From dabb87b387f94e7762f7b4c7986367893c41fd10 Mon Sep 17 00:00:00 2001
From: Roshani Nagmote <roshaninagmote2@gmail.com>
Date: Fri, 9 Nov 2018 16:59:58 -0800
Subject: [PATCH] fixing cross-reference issues

---
 python/mxnet/ndarray/ndarray.py     | 68 ++++++++++++++---------------
 python/mxnet/ndarray/sparse.py      | 16 +++----
 python/mxnet/optimizer/optimizer.py | 59 ++++++++++++++-----------
 python/mxnet/test_utils.py          | 12 +++--
 4 files changed, 83 insertions(+), 72 deletions(-)

diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index bf1140d2071b..112fd56af676 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -399,7 +399,7 @@ def __setitem__(self, key, value):
 
         Parameters
         ----------
-        key : int, slice, list, np.ndarray, NDArray, or tuple of all previous types
+        key : int, mxnet.ndarray.slice, list, np.ndarray, NDArray, or tuple of all previous types
             The indexing key.
         value : scalar or array-like object that can be broadcast to the shape of self[key]
             The value to set.
@@ -467,7 +467,7 @@ def __getitem__(self, key):
 
         Parameters
         ----------
-        key : int, slice, list, np.ndarray, NDArray, or tuple of all previous types
+        key : int, mxnet.ndarray.slice, list, np.ndarray, NDArray, or tuple of all previous types
             Indexing key.
 
         Examples
@@ -2642,9 +2642,9 @@ def add(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be added.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be added.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2704,9 +2704,9 @@ def subtract(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be subtracted.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be subtracted.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2765,9 +2765,9 @@ def multiply(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be multiplied.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be multiplied.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2826,9 +2826,9 @@ def divide(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array in division.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array in division.
         The arrays to be divided. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -2883,9 +2883,9 @@ def modulo(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array in modulo.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array in modulo.
         The arrays to be taken modulo. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -3002,9 +3002,9 @@ def maximum(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3059,9 +3059,9 @@ def minimum(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3120,9 +3120,9 @@ def equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3184,9 +3184,9 @@ def not_equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3251,9 +3251,9 @@ def greater(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3315,9 +3315,9 @@ def greater_equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3379,9 +3379,9 @@ def lesser(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3443,9 +3443,9 @@ def lesser_equal(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First array to be compared.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second array to be compared. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3506,9 +3506,9 @@ def logical_and(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First input of the function.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second input of the function. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3566,9 +3566,9 @@ def logical_or(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First input of the function.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second input of the function. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
@@ -3626,9 +3626,9 @@ def logical_xor(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.array
         First input of the function.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.array
          Second input of the function. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
 
diff --git a/python/mxnet/ndarray/sparse.py b/python/mxnet/ndarray/sparse.py
index fbc42e3614d3..1e69eac7f702 100644
--- a/python/mxnet/ndarray/sparse.py
+++ b/python/mxnet/ndarray/sparse.py
@@ -1205,9 +1205,9 @@ def add(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array to be added.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array to be added.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -1277,9 +1277,9 @@ def subtract(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array to be subtracted.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array to be subtracted.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.__spec__
@@ -1348,9 +1348,9 @@ def multiply(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array to be multiplied.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array to be multiplied.
         If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
@@ -1432,9 +1432,9 @@ def divide(lhs, rhs):
 
     Parameters
     ----------
-    lhs : scalar or array
+    lhs : scalar or mxnet.ndarray.sparse.array
         First array in division.
-    rhs : scalar or array
+    rhs : scalar or mxnet.ndarray.sparse.array
          Second array in division.
         The arrays to be divided. If ``lhs.shape != rhs.shape``, they must be
         broadcastable to a common shape.
diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py
index 1cdc78e355c2..d632a8c7c640 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -70,11 +70,12 @@ class Optimizer(object):
         The initial number of updates.
 
     multi_precision : bool, optional
-       Flag to control the internal precision of the optimizer.
-       ``False`` results in using the same precision as the weights (default),
-       ``True`` makes internal 32-bit copy of the weights and applies gradients
-       in 32-bit precision even if actual weights used in the model have lower precision.
-       Turning this on can improve convergence and accuracy when training with float16.
+       Flag to control the internal precision of the optimizer.::
+
+           False: results in using the same precision as the weights (default),
+           True: makes internal 32-bit copy of the weights and applies gradients
+           in 32-bit precision even if actual weights used in the model have lower precision.
+           Turning this on can improve convergence and accuracy when training with float16.
 
     Properties
     ----------
@@ -481,16 +482,17 @@ class SGD(Optimizer):
     Parameters
     ----------
     momentum : float, optional
-       The momentum value.
+        The momentum value.
     lazy_update : bool, optional
-       Default is True. If True, lazy updates are applied \
-       if the storage types of weight and grad are both ``row_sparse``.
+        Default is True. If True, lazy updates are applied \
+        if the storage types of weight and grad are both ``row_sparse``.
     multi_precision: bool, optional
-       Flag to control the internal precision of the optimizer.
-       ``False`` results in using the same precision as the weights (default),
-       ``True`` makes internal 32-bit copy of the weights and applies gradients \
-                in 32-bit precision even if actual weights used in the model have lower precision.\
-                Turning this on can improve convergence and accuracy when training with float16.
+        Flag to control the internal precision of the optimizer.::
+
+            False: results in using the same precision as the weights (default),
+            True: makes internal 32-bit copy of the weights and applies gradients
+            in 32-bit precision even if actual weights used in the model have lower precision.
+            Turning this on can improve convergence and accuracy when training with float16.
     """
     def __init__(self, momentum=0.0, lazy_update=True, **kwargs):
         super(SGD, self).__init__(**kwargs)
@@ -694,11 +696,13 @@ class LBSGD(Optimizer):
     momentum : float, optional
         The momentum value.
     multi_precision: bool, optional
-        Flag to control the internal precision of the optimizer.
-        ``False`` results in using the same precision as the weights (default),
-        ``True`` makes internal 32-bit copy of the weights and applies gradients
-        in 32-bit precision even if actual weights used in the model have lower precision.
-        Turning this on can improve convergence and accuracy when training with float16.
+        Flag to control the internal precision of the optimizer.::
+
+            False: results in using the same precision as the weights (default),
+            True: makes internal 32-bit copy of the weights and applies gradients
+            in 32-bit precision even if actual weights used in the model have lower precision.
+            Turning this on can improve convergence and accuracy when training with float16.
+
     warmup_strategy: string ('linear', 'power2', 'sqrt'. , 'lars'   default : 'linear')
     warmup_epochs: unsigned, default: 5
     batch_scale:   unsigned, default: 1 (same as batch size*numworkers)
@@ -933,11 +937,12 @@ class NAG(Optimizer):
     momentum : float, optional
        The momentum value.
     multi_precision: bool, optional
-       Flag to control the internal precision of the optimizer.
-       ``False`` results in using the same precision as the weights (default),
-       ``True`` makes internal 32-bit copy of the weights and applies gradients \
-                in 32-bit precision even if actual weights used in the model have lower precision.\
-                Turning this on can improve convergence and accuracy when training with float16.
+        Flag to control the internal precision of the optimizer.::
+
+            False: results in using the same precision as the weights (default),
+            True: makes internal 32-bit copy of the weights and applies gradients
+            in 32-bit precision even if actual weights used in the model have lower precision.
+            Turning this on can improve convergence and accuracy when training with float16.
     """
     def __init__(self, momentum=0.0, **kwargs):
         super(NAG, self).__init__(**kwargs)
@@ -1175,9 +1180,11 @@ class RMSProp(Optimizer):
     epsilon : float, optional
         Small value to avoid division by 0.
     centered : bool, optional
-        Flag to control which version of RMSProp to use.
-        ``True`` will use Graves's version of `RMSProp`,
-        ``False`` will use Tieleman & Hinton's version of `RMSProp`.
+        Flag to control which version of RMSProp to use.::
+
+            True: will use Graves's version of `RMSProp`,
+            False: will use Tieleman & Hinton's version of `RMSProp`.
+
     clip_weights : float, optional
         Clips weights into range ``[-clip_weights, clip_weights]``.
     """
diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py
index 7ac63c6c53d5..d23b563add96 100644
--- a/python/mxnet/test_utils.py
+++ b/python/mxnet/test_utils.py
@@ -261,10 +261,14 @@ def rand_sparse_ndarray(shape, stype, density=None, dtype=None, distribution=Non
     Parameters
     ----------
     shape: list or tuple
-    stype: str, valid values: "csr" or "row_sparse"
-    density, optional: float, should be between 0 and 1
-    distribution, optional: str, valid values: "uniform" or "powerlaw"
-    dtype, optional: numpy.dtype, default value is None
+    stype: str
+        valid values: "csr" or "row_sparse"
+    density: float, optional
+        should be between 0 and 1
+    distribution: str, optional
+        valid values: "uniform" or "powerlaw"
+    dtype: numpy.dtype, optional
+        default value is None
 
     Returns
     -------