From a683dee5880a63d3798122941be65d30c688f030 Mon Sep 17 00:00:00 2001
From: Roshani Nagmote <roshaninagmote2@gmail.com>
Date: Wed, 7 Nov 2018 10:55:45 -0800
Subject: [PATCH 1/4] Doc fixes

---
 docs/api/python/ndarray/sparse.md                |  2 +-
 python/mxnet/gluon/model_zoo/vision/mobilenet.py | 14 +++++++-------
 python/mxnet/ndarray/sparse.py                   |  2 +-
 python/mxnet/optimizer/optimizer.py              | 13 ++++++-------
 python/mxnet/recordio.py                         | 16 ++++++++--------
 python/mxnet/visualization.py                    |  8 ++++----
 src/operator/tensor/matrix_op.cc                 |  6 +++---
 7 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/docs/api/python/ndarray/sparse.md b/docs/api/python/ndarray/sparse.md
index 2ade059a70c9..acd5d2d4acc6 100644
--- a/docs/api/python/ndarray/sparse.md
+++ b/docs/api/python/ndarray/sparse.md
@@ -582,7 +582,7 @@ We summarize the interface for each class in the following sections.
     :members: shape, context, dtype, stype, data, indices, indptr, copy, copyto, as_in_context, asscipy, asnumpy, asscalar, astype, tostype, slice, wait_to_read, zeros_like, round, rint, fix, floor, ceil, trunc, sin, tan, arcsin, arctan, degrees, radians, sinh, tanh, arcsinh, arctanh, expm1, log1p, sqrt, square, __neg__, sum, mean, norm, square, __getitem__, __setitem__, check_format, abs, clip, sign
 
 .. autoclass:: mxnet.ndarray.sparse.RowSparseNDArray
-    :members: shape, context, dtype, stype, data, indices, copy, copyto, as_in_context, asnumpy, asscalar, astype, tostype, wait_to_read, zeros_like, round, rint, fix, floor, ceil, trunc, sin, tan, arcsin, arctan, degrees, radians, sinh, tanh, arcsinh, arctanh, expm1, log1p, sqrt, square, __negative__, norm, __getitem__, __setitem__, check_format, retain, abs, clip, sign
+    :members: shape, context, dtype, stype, data, indices, copy, copyto, as_in_context, asnumpy, asscalar, astype, tostype, wait_to_read, zeros_like, round, rint, fix, floor, ceil, trunc, sin, tan, arcsin, arctan, degrees, radians, sinh, tanh, arcsinh, arctanh, expm1, log1p, sqrt, square, norm, __getitem__, __setitem__, check_format, retain, abs, clip, sign
 
 .. automodule:: mxnet.ndarray.sparse
     :members:
diff --git a/python/mxnet/gluon/model_zoo/vision/mobilenet.py b/python/mxnet/gluon/model_zoo/vision/mobilenet.py
index 1a84e05af208..88610571252e 100644
--- a/python/mxnet/gluon/model_zoo/vision/mobilenet.py
+++ b/python/mxnet/gluon/model_zoo/vision/mobilenet.py
@@ -62,7 +62,7 @@ def _add_conv_dw(out, dw_channels, channels, stride, relu6=False):
 class LinearBottleneck(nn.HybridBlock):
     r"""LinearBottleneck used in MobileNetV2 model from the
     `"Inverted Residuals and Linear Bottlenecks:
-      Mobile Networks for Classification, Detection and Segmentation"
+    Mobile Networks for Classification, Detection and Segmentation"
     <https://arxiv.org/abs/1801.04381>`_ paper.
 
     Parameters
@@ -138,7 +138,7 @@ def hybrid_forward(self, F, x):
 class MobileNetV2(nn.HybridBlock):
     r"""MobileNetV2 model from the
     `"Inverted Residuals and Linear Bottlenecks:
-      Mobile Networks for Classification, Detection and Segmentation"
+    Mobile Networks for Classification, Detection and Segmentation"
     <https://arxiv.org/abs/1801.04381>`_ paper.
 
     Parameters
@@ -223,7 +223,7 @@ def get_mobilenet_v2(multiplier, pretrained=False, ctx=cpu(),
                      root=os.path.join(base.data_dir(), 'models'), **kwargs):
     r"""MobileNetV2 model from the
     `"Inverted Residuals and Linear Bottlenecks:
-      Mobile Networks for Classification, Detection and Segmentation"
+    Mobile Networks for Classification, Detection and Segmentation"
     <https://arxiv.org/abs/1801.04381>`_ paper.
 
     Parameters
@@ -269,7 +269,7 @@ def mobilenet1_0(**kwargs):
 def mobilenet_v2_1_0(**kwargs):
     r"""MobileNetV2 model from the
     `"Inverted Residuals and Linear Bottlenecks:
-      Mobile Networks for Classification, Detection and Segmentation"
+    Mobile Networks for Classification, Detection and Segmentation"
     <https://arxiv.org/abs/1801.04381>`_ paper.
 
     Parameters
@@ -300,7 +300,7 @@ def mobilenet0_75(**kwargs):
 def mobilenet_v2_0_75(**kwargs):
     r"""MobileNetV2 model from the
     `"Inverted Residuals and Linear Bottlenecks:
-      Mobile Networks for Classification, Detection and Segmentation"
+    Mobile Networks for Classification, Detection and Segmentation"
     <https://arxiv.org/abs/1801.04381>`_ paper.
 
     Parameters
@@ -331,7 +331,7 @@ def mobilenet0_5(**kwargs):
 def mobilenet_v2_0_5(**kwargs):
     r"""MobileNetV2 model from the
     `"Inverted Residuals and Linear Bottlenecks:
-      Mobile Networks for Classification, Detection and Segmentation"
+    Mobile Networks for Classification, Detection and Segmentation"
     <https://arxiv.org/abs/1801.04381>`_ paper.
 
     Parameters
@@ -362,7 +362,7 @@ def mobilenet0_25(**kwargs):
 def mobilenet_v2_0_25(**kwargs):
     r"""MobileNetV2 model from the
     `"Inverted Residuals and Linear Bottlenecks:
-      Mobile Networks for Classification, Detection and Segmentation"
+    Mobile Networks for Classification, Detection and Segmentation"
     <https://arxiv.org/abs/1801.04381>`_ paper.
 
     Parameters
diff --git a/python/mxnet/ndarray/sparse.py b/python/mxnet/ndarray/sparse.py
index 3d18a596d4f4..fbc42e3614d3 100644
--- a/python/mxnet/ndarray/sparse.py
+++ b/python/mxnet/ndarray/sparse.py
@@ -420,7 +420,7 @@ def __setitem__(self, key, value):
         if isinstance(key, py_slice):
             if key.step is not None or key.start is not None or key.stop is not None:
                 raise ValueError('Assignment with slice for CSRNDArray is not ' \
-                                 'implmented yet.')
+                                 'implemented yet.')
             if isinstance(value, NDArray):
                 # avoid copying to itself
                 if value.handle is not self.handle:
diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py
index bc03497fc99f..1cdc78e355c2 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -692,20 +692,19 @@ class LBSGD(Optimizer):
     Parameters
     ----------
     momentum : float, optional
-       The momentum value.
+        The momentum value.
     multi_precision: bool, optional
-       Flag to control the internal precision of the optimizer.
-       ``False`` results in using the same precision as the weights (default),
-       ``True`` makes internal 32-bit copy of the weights and applies gradients
-                in 32-bit precision even if actual weights used in the model have lower precision.`<
-                Turning this on can improve convergence and accuracy when training with float16.
+        Flag to control the internal precision of the optimizer.
+        ``False`` results in using the same precision as the weights (default),
+        ``True`` makes internal 32-bit copy of the weights and applies gradients
+        in 32-bit precision even if actual weights used in the model have lower precision.
+        Turning this on can improve convergence and accuracy when training with float16.
     warmup_strategy: string ('linear', 'power2', 'sqrt'. , 'lars'   default : 'linear')
     warmup_epochs: unsigned, default: 5
     batch_scale:   unsigned, default: 1 (same as batch size*numworkers)
     updates_per_epoch: updates_per_epoch (default: 32, Default might not reflect true number batches per epoch. Used for warmup.)
     begin_epoch: unsigned, default 0, starting epoch.
     """
-
     def __init__(self, momentum=0.0, multi_precision=False, warmup_strategy='linear',
                  warmup_epochs=5, batch_scale=1, updates_per_epoch=32, begin_epoch=0, num_epochs=60,
                  **kwargs):
diff --git a/python/mxnet/recordio.py b/python/mxnet/recordio.py
index 6fc4d8e7bf57..8415d67c3f91 100644
--- a/python/mxnet/recordio.py
+++ b/python/mxnet/recordio.py
@@ -37,7 +37,7 @@ class MXRecordIO(object):
     """Reads/writes `RecordIO` data format, supporting sequential read and write.
 
     Example usage:
-    ----------
+    --------------
     >>> record = mx.recordio.MXRecordIO('tmp.rec', 'w')
     <mxnet.recordio.MXRecordIO object at 0x10ef40ed0>
     >>> for i in range(5):
@@ -125,7 +125,7 @@ def reset(self):
         If the record is opened with 'w', this function will truncate the file to empty.
 
         Example usage:
-        ----------
+        --------------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'r')
         >>> for i in range(2):
         ...    item = record.read()
@@ -144,7 +144,7 @@ def write(self, buf):
         """Inserts a string buffer as a record.
 
         Example usage:
-        ----------
+        --------------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'w')
         >>> for i in range(5):
         ...    record.write('record_%d'%i)
@@ -164,7 +164,7 @@ def read(self):
         """Returns record as a string.
 
         Example usage:
-        ----------
+        --------------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'r')
         >>> for i in range(5):
         ...    item = record.read()
@@ -197,7 +197,7 @@ class MXIndexedRecordIO(MXRecordIO):
     """Reads/writes `RecordIO` data format, supporting random access.
 
     Example usage:
-    ----------
+    --------------
     >>> for i in range(5):
     ...     record.write_idx(i, 'record_%d'%i)
     >>> record.close()
@@ -262,7 +262,7 @@ def tell(self):
         """Returns the current position of write head.
 
         Example usage:
-        ----------
+        --------------
         >>> record = mx.recordio.MXIndexedRecordIO('tmp.idx', 'tmp.rec', 'w')
         >>> print(record.tell())
         0
@@ -284,7 +284,7 @@ def read_idx(self, idx):
         """Returns the record at given index.
 
         Example usage:
-        ----------
+        --------------
         >>> record = mx.recordio.MXIndexedRecordIO('tmp.idx', 'tmp.rec', 'w')
         >>> for i in range(5):
         ...     record.write_idx(i, 'record_%d'%i)
@@ -300,7 +300,7 @@ def write_idx(self, idx, buf):
         """Inserts input record at given index.
 
         Example usage:
-        ----------
+        --------------
         >>> for i in range(5):
         ...     record.write_idx(i, 'record_%d'%i)
         >>> record.close()
diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py
index a0eb253cc7eb..2be2b5d2ab31 100644
--- a/python/mxnet/visualization.py
+++ b/python/mxnet/visualization.py
@@ -213,12 +213,12 @@ def plot_network(symbol, title="plot", save_format='pdf', shape=None, node_attrs
         input symbol names (str) to the corresponding tensor shape (tuple).
     node_attrs: dict, optional
         Specifies the attributes for nodes in the generated visualization. `node_attrs` is
-        a dictionary of Graphviz attribute names and values. For example,
+        a dictionary of Graphviz attribute names and values. For example::
             ``node_attrs={"shape":"oval","fixedsize":"false"}``
-            will use oval shape for nodes and allow variable sized nodes in the visualization.
+        will use oval shape for nodes and allow variable sized nodes in the visualization.
     hide_weights: bool, optional
-        If True (default), then inputs with names of form *_weight (corresponding to weight
-        tensors) or *_bias (corresponding to bias vectors) will be hidden for a cleaner
+        If True (default), then inputs with names of form \*_weight (corresponding to weight
+        tensors) or \*_bias (corresponding to bias vectors) will be hidden for a cleaner
         visualization.
 
     Returns
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 77d9bf06e2d1..0faa668caf97 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -396,9 +396,9 @@ The storage type of ``slice`` output depends on storage types of inputs
 - otherwise, ``slice`` generates output with default storage
 
 .. note:: When input data storage type is csr, it only supports
-step=(), or step=(None,), or step=(1,) to generate a csr output.
-For other step parameter values, it falls back to slicing
-a dense tensor.
+   step=(), or step=(None,), or step=(1,) to generate a csr output.
+   For other step parameter values, it falls back to slicing
+   a dense tensor.
 
 Example::
 

From 86fb22022a113a5c937c7f0ab2dfb3d374f5ccf5 Mon Sep 17 00:00:00 2001
From: Roshani Nagmote <roshaninagmote2@gmail.com>
Date: Thu, 8 Nov 2018 11:09:15 -0800
Subject: [PATCH 2/4] addressing feedback

---
 python/mxnet/recordio.py      | 32 ++++++++++++++++----------------
 python/mxnet/visualization.py |  8 +++++---
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/python/mxnet/recordio.py b/python/mxnet/recordio.py
index 8415d67c3f91..19962a8d91e1 100644
--- a/python/mxnet/recordio.py
+++ b/python/mxnet/recordio.py
@@ -36,8 +36,8 @@
 class MXRecordIO(object):
     """Reads/writes `RecordIO` data format, supporting sequential read and write.
 
-    Example usage:
-    --------------
+    Examples:
+    ---------
     >>> record = mx.recordio.MXRecordIO('tmp.rec', 'w')
     <mxnet.recordio.MXRecordIO object at 0x10ef40ed0>
     >>> for i in range(5):
@@ -124,8 +124,8 @@ def reset(self):
 
         If the record is opened with 'w', this function will truncate the file to empty.
 
-        Example usage:
-        --------------
+        Examples:
+        ---------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'r')
         >>> for i in range(2):
         ...    item = record.read()
@@ -143,8 +143,8 @@ def reset(self):
     def write(self, buf):
         """Inserts a string buffer as a record.
 
-        Example usage:
-        --------------
+        Examples:
+        ---------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'w')
         >>> for i in range(5):
         ...    record.write('record_%d'%i)
@@ -163,8 +163,8 @@ def write(self, buf):
     def read(self):
         """Returns record as a string.
 
-        Example usage:
-        --------------
+        Examples:
+        ---------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'r')
         >>> for i in range(5):
         ...    item = record.read()
@@ -196,8 +196,8 @@ def read(self):
 class MXIndexedRecordIO(MXRecordIO):
     """Reads/writes `RecordIO` data format, supporting random access.
 
-    Example usage:
-    --------------
+    Examples:
+    ---------
     >>> for i in range(5):
     ...     record.write_idx(i, 'record_%d'%i)
     >>> record.close()
@@ -261,8 +261,8 @@ def seek(self, idx):
     def tell(self):
         """Returns the current position of write head.
 
-        Example usage:
-        --------------
+        Examples:
+        ---------
         >>> record = mx.recordio.MXIndexedRecordIO('tmp.idx', 'tmp.rec', 'w')
         >>> print(record.tell())
         0
@@ -283,8 +283,8 @@ def tell(self):
     def read_idx(self, idx):
         """Returns the record at given index.
 
-        Example usage:
-        --------------
+        Examples:
+        ---------
         >>> record = mx.recordio.MXIndexedRecordIO('tmp.idx', 'tmp.rec', 'w')
         >>> for i in range(5):
         ...     record.write_idx(i, 'record_%d'%i)
@@ -299,8 +299,8 @@ def read_idx(self, idx):
     def write_idx(self, idx, buf):
         """Inserts input record at given index.
 
-        Example usage:
-        --------------
+        Examples:
+        ---------
         >>> for i in range(5):
         ...     record.write_idx(i, 'record_%d'%i)
         >>> record.close()
diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py
index 2be2b5d2ab31..9297edee810b 100644
--- a/python/mxnet/visualization.py
+++ b/python/mxnet/visualization.py
@@ -214,11 +214,13 @@ def plot_network(symbol, title="plot", save_format='pdf', shape=None, node_attrs
     node_attrs: dict, optional
         Specifies the attributes for nodes in the generated visualization. `node_attrs` is
         a dictionary of Graphviz attribute names and values. For example::
-            ``node_attrs={"shape":"oval","fixedsize":"false"}``
+
+            node_attrs={"shape":"oval","fixedsize":"false"}
+
         will use oval shape for nodes and allow variable sized nodes in the visualization.
     hide_weights: bool, optional
-        If True (default), then inputs with names of form \*_weight (corresponding to weight
-        tensors) or \*_bias (corresponding to bias vectors) will be hidden for a cleaner
+        If True (default), then inputs with names of form *_weight* (corresponding to weight
+        tensors) or *_bias* (corresponding to bias vectors) will be hidden for a cleaner
         visualization.
 
     Returns

From 4e74b3d74b57520e33221f658558a3c5941d579f Mon Sep 17 00:00:00 2001
From: Roshani Nagmote <roshaninagmote2@gmail.com>
Date: Thu, 8 Nov 2018 14:14:59 -0800
Subject: [PATCH 3/4] base_module fix

---
 python/mxnet/module/base_module.py |  4 ++--
 python/mxnet/recordio.py           | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py
index c534261eacc2..babea53d6e40 100644
--- a/python/mxnet/module/base_module.py
+++ b/python/mxnet/module/base_module.py
@@ -279,8 +279,8 @@ def score(self, eval_data, eval_metric, num_batch=None, batch_end_callback=None,
     def iter_predict(self, eval_data, num_batch=None, reset=True, sparse_row_id_fn=None):
         """Iterates over predictions.
 
-        Example Usage:
-        ----------
+        Examples
+        --------
         >>> for pred, i_batch, batch in module.iter_predict(eval_data):
         ...     # pred is a list of outputs from the module
         ...     # i_batch is a integer
diff --git a/python/mxnet/recordio.py b/python/mxnet/recordio.py
index 19962a8d91e1..2def141c9340 100644
--- a/python/mxnet/recordio.py
+++ b/python/mxnet/recordio.py
@@ -36,7 +36,7 @@
 class MXRecordIO(object):
     """Reads/writes `RecordIO` data format, supporting sequential read and write.
 
-    Examples:
+    Examples
     ---------
     >>> record = mx.recordio.MXRecordIO('tmp.rec', 'w')
     <mxnet.recordio.MXRecordIO object at 0x10ef40ed0>
@@ -124,7 +124,7 @@ def reset(self):
 
         If the record is opened with 'w', this function will truncate the file to empty.
 
-        Examples:
+        Examples
         ---------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'r')
         >>> for i in range(2):
@@ -143,7 +143,7 @@ def reset(self):
     def write(self, buf):
         """Inserts a string buffer as a record.
 
-        Examples:
+        Examples
         ---------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'w')
         >>> for i in range(5):
@@ -163,7 +163,7 @@ def write(self, buf):
     def read(self):
         """Returns record as a string.
 
-        Examples:
+        Examples
         ---------
         >>> record = mx.recordio.MXRecordIO('tmp.rec', 'r')
         >>> for i in range(5):
@@ -196,7 +196,7 @@ def read(self):
 class MXIndexedRecordIO(MXRecordIO):
     """Reads/writes `RecordIO` data format, supporting random access.
 
-    Examples:
+    Examples
     ---------
     >>> for i in range(5):
     ...     record.write_idx(i, 'record_%d'%i)
@@ -261,7 +261,7 @@ def seek(self, idx):
     def tell(self):
         """Returns the current position of write head.
 
-        Examples:
+        Examples
         ---------
         >>> record = mx.recordio.MXIndexedRecordIO('tmp.idx', 'tmp.rec', 'w')
         >>> print(record.tell())
@@ -283,7 +283,7 @@ def tell(self):
     def read_idx(self, idx):
         """Returns the record at given index.
 
-        Examples:
+        Examples
         ---------
         >>> record = mx.recordio.MXIndexedRecordIO('tmp.idx', 'tmp.rec', 'w')
         >>> for i in range(5):
@@ -299,7 +299,7 @@ def read_idx(self, idx):
     def write_idx(self, idx, buf):
         """Inserts input record at given index.
 
-        Examples:
+        Examples
         ---------
         >>> for i in range(5):
         ...     record.write_idx(i, 'record_%d'%i)

From dabb87b387f94e7762f7b4c7986367893c41fd10 Mon Sep 17 00:00:00 2001
From: Roshani Nagmote <roshaninagmote2@gmail.com>
Date: Fri, 9 Nov 2018 16:59:58 -0800
Subject: [PATCH 4/4] fixing cross-reference issues

---
 python/mxnet/ndarray/ndarray.py     | 68 ++++++++++++++---------------
 python/mxnet/ndarray/sparse.py      | 16 +++----
 python/mxnet/optimizer/optimizer.py | 59 ++++++++++++++-----------
 python/mxnet/test_utils.py          | 12 +++--
 4 files changed, 83 insertions(+), 72 deletions(-)

diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index bf1140d2071b..112fd56af676 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -399,7 +399,7 @@ def __setitem__(self, key, value):
 
         Parameters
         ----------
-        key : int, slice, list, np.ndarray, NDArray, or tuple of all previous types
+        key : int, mxnet.ndarray.slice, list, np.ndarray, NDArray, or tuple of all previous types
             The indexing key.
         value : scalar or array-like object that can be broadcast to the shape of self[key]
             The value to set.
@@ -467,7 +467,7 @@ def __getitem__(self, key):
 
         Parameters
         ----------
-        key : int, slice, list, np.ndarray, NDArray, or tuple of all previous types
+        key : int, mxnet.ndarray.slice, list, np.ndarray, NDArray, or tuple of all previous types
             Indexing key.
 
         Examples
@@ -2642,9 +2642,9 @@ def add(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be added.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be added.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2704,9 +2704,9 @@ def subtract(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be subtracted.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be subtracted.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2765,9 +2765,9 @@ def multiply(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be multiplied.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be multiplied.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2826,9 +2826,9 @@ def divide(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array in division.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array in division.
         The arrays to be divided. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2883,9 +2883,9 @@ def modulo(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array in modulo.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array in modulo.
         The arrays to be taken modulo. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -3002,9 +3002,9 @@ def maximum(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3059,9 +3059,9 @@ def minimum(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3120,9 +3120,9 @@ def equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3184,9 +3184,9 @@ def not_equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3251,9 +3251,9 @@ def greater(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3315,9 +3315,9 @@ def greater_equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3379,9 +3379,9 @@ def lesser(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3443,9 +3443,9 @@ def lesser_equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3506,9 +3506,9 @@ def logical_and(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First input of the function.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second input of the function. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3566,9 +3566,9 @@ def logical_or(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First input of the function.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second input of the function. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3626,9 +3626,9 @@ def logical_xor(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First input of the function.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second input of the function. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
diff --git a/python/mxnet/ndarray/sparse.py b/python/mxnet/ndarray/sparse.py
index fbc42e3614d3..1e69eac7f702 100644
--- a/python/mxnet/ndarray/sparse.py
+++ b/python/mxnet/ndarray/sparse.py
@@ -1205,9 +1205,9 @@ def add(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array to be added.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array to be added.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -1277,9 +1277,9 @@ def subtract(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array to be subtracted.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array to be subtracted.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.__spec__
@@ -1348,9 +1348,9 @@ def multiply(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array to be multiplied.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array to be multiplied.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -1432,9 +1432,9 @@ def divide(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array in division.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array in division.
         The arrays to be divided. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py
index 1cdc78e355c2..d632a8c7c640 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -70,11 +70,12 @@ class Optimizer(object):
         The initial number of updates.
 
     multi_precision : bool, optional
-       Flag to control the internal precision of the optimizer.
-       ``False`` results in using the same precision as the weights (default),
-       ``True`` makes internal 32-bit copy of the weights and applies gradients
-       in 32-bit precision even if actual weights used in the model have lower precision.
-       Turning this on can improve convergence and accuracy when training with float16.
+       Flag to control the internal precision of the optimizer.::
+
+           False: results in using the same precision as the weights (default),
+           True: makes internal 32-bit copy of the weights and applies gradients
+           in 32-bit precision even if actual weights used in the model have lower precision.
+           Turning this on can improve convergence and accuracy when training with float16.
 
     Properties
     ----------
@@ -481,16 +482,17 @@ class SGD(Optimizer):
     Parameters
     ----------
     momentum : float, optional
-       The momentum value.
+        The momentum value.
     lazy_update : bool, optional
-       Default is True. If True, lazy updates are applied \
-       if the storage types of weight and grad are both ``row_sparse``.
+        Default is True. If True, lazy updates are applied \
+        if the storage types of weight and grad are both ``row_sparse``.
     multi_precision: bool, optional
-       Flag to control the internal precision of the optimizer.
-       ``False`` results in using the same precision as the weights (default),
-       ``True`` makes internal 32-bit copy of the weights and applies gradients \
-                in 32-bit precision even if actual weights used in the model have lower precision.\
-                Turning this on can improve convergence and accuracy when training with float16.
+        Flag to control the internal precision of the optimizer.::
+
+            False: results in using the same precision as the weights (default),
+            True: makes internal 32-bit copy of the weights and applies gradients
+            in 32-bit precision even if actual weights used in the model have lower precision.
+            Turning this on can improve convergence and accuracy when training with float16.
     """
     def __init__(self, momentum=0.0, lazy_update=True, **kwargs):
         super(SGD, self).__init__(**kwargs)
@@ -694,11 +696,13 @@ class LBSGD(Optimizer):
     momentum : float, optional
         The momentum value.
     multi_precision: bool, optional
-        Flag to control the internal precision of the optimizer.
-        ``False`` results in using the same precision as the weights (default),
-        ``True`` makes internal 32-bit copy of the weights and applies gradients
-        in 32-bit precision even if actual weights used in the model have lower precision.
-        Turning this on can improve convergence and accuracy when training with float16.
+        Flag to control the internal precision of the optimizer.::
+
+            False: results in using the same precision as the weights (default),
+            True: makes internal 32-bit copy of the weights and applies gradients
+            in 32-bit precision even if actual weights used in the model have lower precision.
+            Turning this on can improve convergence and accuracy when training with float16.
+
     warmup_strategy: string ('linear', 'power2', 'sqrt'. , 'lars'   default : 'linear')
     warmup_epochs: unsigned, default: 5
     batch_scale:   unsigned, default: 1 (same as batch size*numworkers)
@@ -933,11 +937,12 @@ class NAG(Optimizer):
     momentum : float, optional
        The momentum value.
     multi_precision: bool, optional
-       Flag to control the internal precision of the optimizer.
-       ``False`` results in using the same precision as the weights (default),
-       ``True`` makes internal 32-bit copy of the weights and applies gradients \
-                in 32-bit precision even if actual weights used in the model have lower precision.\
-                Turning this on can improve convergence and accuracy when training with float16.
+        Flag to control the internal precision of the optimizer.::
+
+            False: results in using the same precision as the weights (default),
+            True: makes internal 32-bit copy of the weights and applies gradients
+            in 32-bit precision even if actual weights used in the model have lower precision.
+            Turning this on can improve convergence and accuracy when training with float16.
     """
     def __init__(self, momentum=0.0, **kwargs):
         super(NAG, self).__init__(**kwargs)
@@ -1175,9 +1180,11 @@ class RMSProp(Optimizer):
     epsilon : float, optional
         Small value to avoid division by 0.
     centered : bool, optional
-        Flag to control which version of RMSProp to use.
-        ``True`` will use Graves's version of `RMSProp`,
-        ``False`` will use Tieleman & Hinton's version of `RMSProp`.
+        Flag to control which version of RMSProp to use.::
+
+            True: will use Graves's version of `RMSProp`,
+            False: will use Tieleman & Hinton's version of `RMSProp`.
+
     clip_weights : float, optional
         Clips weights into range ``[-clip_weights, clip_weights]``.
     """
diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index 7ac63c6c53d5..d23b563add96 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -261,10 +261,14 @@ def rand_sparse_ndarray(shape, stype, density=None, dtype=None, distribution=Non
     Parameters
     ----------
     shape: list or tuple
-    stype: str, valid values: "csr" or "row_sparse"
-    density, optional: float, should be between 0 and 1
-    distribution, optional: str, valid values: "uniform" or "powerlaw"
-    dtype, optional: numpy.dtype, default value is None
+    stype: str
+        valid values: "csr" or "row_sparse"
+    density: float, optional
+        should be between 0 and 1
+    distribution: str, optional
+        valid values: "uniform" or "powerlaw"
+    dtype: numpy.dtype, optional
+        default value is None
 
     Returns
     -------