From ad0db62c5e6488a4a101f4ef64329269ee9dfb2f Mon Sep 17 00:00:00 2001
From: Haibin Lin <linhaibin.eric@gmail.com>
Date: Mon, 12 Aug 2019 16:36:28 -0700
Subject: [PATCH 1/2] Update basic_layers.py

---
 python/mxnet/gluon/nn/basic_layers.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py
index b1482ce6dd82..0c42077a3d7b 100644
--- a/python/mxnet/gluon/nn/basic_layers.py
+++ b/python/mxnet/gluon/nn/basic_layers.py
@@ -150,8 +150,9 @@ class Dense(HybridBlock):
     created by the layer, and `bias` is a bias vector created by the layer
     (only applicable if `use_bias` is `True`).
 
-    Note: the input must be a tensor with rank 2. Use `flatten` to convert it
-    to rank 2 manually if necessary.
+    .. note::
+        the input must be a tensor with rank 2. Use `flatten` to convert it
+        to rank 2 manually if necessary.
 
     Parameters
     ----------
@@ -374,11 +375,13 @@ class Embedding(HybridBlock):
     r"""Turns non-negative integers (indexes/tokens) into dense vectors
     of fixed size. eg. [4, 20] -> [[0.25, 0.1], [0.6, -0.2]]
 
-    Note: if `sparse_grad` is set to True, the gradient w.r.t weight will be
-    sparse. Only a subset of optimizers support sparse gradients, including SGD, AdaGrad
-    and Adam. By default lazy updates is turned on, which may perform differently
-    from standard updates. For more details, please check the Optimization API at:
-    https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
+    .. note::
+        if `sparse_grad` is set to True, the gradient w.r.t weight will be
+        sparse. Only a subset of optimizers support sparse gradients, including SGD,
+        AdaGrad and Adam. By default lazy updates is turned on, which may perform
+        differently from standard updates. For more details, please check the
+        Optimization API at:
+        https://mxnet.incubator.apache.org/api/python/optimization/optimization.html
 
     Parameters
     ----------

From a8115630e79120e1fb83b9bb9e5e3c3ff4e05c72 Mon Sep 17 00:00:00 2001
From: Haibin Lin <linhaibin.eric@gmail.com>
Date: Mon, 12 Aug 2019 16:38:49 -0700
Subject: [PATCH 2/2] Update indexing_op.cc

---
 src/operator/tensor/indexing_op.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/operator/tensor/indexing_op.cc b/src/operator/tensor/indexing_op.cc
index ad4e54db54f1..8a5030ef235b 100644
--- a/src/operator/tensor/indexing_op.cc
+++ b/src/operator/tensor/indexing_op.cc
@@ -481,8 +481,9 @@ All the input values should be integers in the range [0, input_dim).
 If the input_dim is ip0 and output_dim is op0, then shape of the embedding weight matrix must be
 (ip0, op0).
 
-By default, if any index mentioned is too large, it is replaced by the index that addresses
-the last vector in an embedding matrix.
+When "sparse_grad" is False, if any index mentioned is too large, it is replaced by the index that
+addresses the last vector in an embedding matrix.
+When "sparse_grad" is True, an error will be raised if invalid indices are found.
 
 Examples::