diff --git a/python/mxnet/autograd.py b/python/mxnet/autograd.py index b3acee27caab..afc67e2569bd 100644 --- a/python/mxnet/autograd.py +++ b/python/mxnet/autograd.py @@ -273,8 +273,10 @@ def grad(heads, variables, head_grads=None, retain_graph=None, create_graph=Fals returned as new NDArrays instead of stored into `variable.grad`. Supports recording gradient graph for computing higher order gradients. - .. Note: Currently only a very limited set of operators support higher order - gradients. + .. note:: + + Currently only a very limited set of operators support higher order \ + gradients. Parameters ---------- diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py index 6cb9fc690b5a..7047364966af 100644 --- a/python/mxnet/gluon/block.py +++ b/python/mxnet/gluon/block.py @@ -326,8 +326,7 @@ def save_parameters(self, filename): References ---------- - `Saving and Loading Gluon Models - + `Saving and Loading Gluon Models \ `_ """ params = self._collect_params_with_prefix() @@ -372,8 +371,7 @@ def load_parameters(self, filename, ctx=None, allow_missing=False, References ---------- - `Saving and Loading Gluon Models - + `Saving and Loading Gluon Models \ `_ """ loaded = ndarray.load(filename) diff --git a/python/mxnet/gluon/contrib/nn/basic_layers.py b/python/mxnet/gluon/contrib/nn/basic_layers.py index c656cd2d4e1d..28fea1592da4 100644 --- a/python/mxnet/gluon/contrib/nn/basic_layers.py +++ b/python/mxnet/gluon/contrib/nn/basic_layers.py @@ -27,7 +27,7 @@ from ...nn import Sequential, HybridSequential, BatchNorm class Concurrent(Sequential): - """Lays `Block`s concurrently. + """Lays `Block` s concurrently. This block feeds its input to all children blocks, and produce the output by concatenating all the children blocks' outputs @@ -60,7 +60,7 @@ def forward(self, x): class HybridConcurrent(HybridSequential): - """Lays `HybridBlock`s concurrently. + """Lays `HybridBlock` s concurrently. This block feeds its input to all children blocks, and produce the output by concatenating all the children blocks' outputs diff --git a/python/mxnet/gluon/contrib/rnn/rnn_cell.py b/python/mxnet/gluon/contrib/rnn/rnn_cell.py index 1b9afee14bf2..0cbc9eaac375 100644 --- a/python/mxnet/gluon/contrib/rnn/rnn_cell.py +++ b/python/mxnet/gluon/contrib/rnn/rnn_cell.py @@ -26,7 +26,7 @@ class VariationalDropoutCell(ModifierCell): """ Applies Variational Dropout on base cell. - (https://arxiv.org/pdf/1512.05287.pdf, + (https://arxiv.org/pdf/1512.05287.pdf, \ https://www.stat.berkeley.edu/~tsmoon/files/Conference/asru2015.pdf). Variational dropout uses the same dropout mask across time-steps. It can be applied to RNN @@ -197,24 +197,29 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N class LSTMPCell(HybridRecurrentCell): r"""Long-Short Term Memory Projected (LSTMP) network cell. (https://arxiv.org/abs/1402.1128) + Each call computes the following function: + .. math:: \begin{array}{ll} i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{ri} r_{(t-1)} + b_{ri}) \\ f_t = sigmoid(W_{if} x_t + b_{if} + W_{rf} r_{(t-1)} + b_{rf}) \\ - g_t = \tanh(W_{ig} x_t + b_{ig} + W_{rc} r_{(t-1)} + b_{rg}}) \\ + g_t = \tanh(W_{ig} x_t + b_{ig} + W_{rc} r_{(t-1)} + b_{rg}) \\ o_t = sigmoid(W_{io} x_t + b_{io} + W_{ro} r_{(t-1)} + b_{ro}) \\ c_t = f_t * c_{(t-1)} + i_t * g_t \\ h_t = o_t * \tanh(c_t) \\ r_t = W_{hr} h_t \end{array} + where :math:`r_t` is the projected recurrent activation at time `t`, - math:`h_t` is the hidden state at time `t`, :math:`c_t` is the + :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`, :math:`x_t` is the input at time `t`, and :math:`i_t`, :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell, and out gates, respectively. + Parameters ---------- + hidden_size : int Number of units in cell state symbol. projection_size : int @@ -234,7 +239,7 @@ class LSTMPCell(HybridRecurrentCell): to zero. h2h_bias_initializer : str or Initializer Initializer for the bias vector. - prefix : str, default 'lstmp_' + prefix : str, default ``'lstmp_``' Prefix for name of `Block`s (and name of weight if params is `None`). params : Parameter or None diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py index 2c98000389ad..0e380c06efa0 100644 --- a/python/mxnet/gluon/data/vision/datasets.py +++ b/python/mxnet/gluon/data/vision/datasets.py @@ -45,8 +45,7 @@ class MNIST(dataset._DownloadedDataset): train : bool, default True Whether to load the training or testing set. transform : function, default None - A user defined callback that transforms each sample. For example: - :: + A user defined callback that transforms each sample. For example:: transform=lambda data, label: (data.astype(np.float32)/255, label) @@ -106,8 +105,7 @@ class FashionMNIST(MNIST): train : bool, default True Whether to load the training or testing set. transform : function, default None - A user defined callback that transforms each sample. For example: - :: + A user defined callback that transforms each sample. For example:: transform=lambda data, label: (data.astype(np.float32)/255, label) @@ -139,8 +137,7 @@ class CIFAR10(dataset._DownloadedDataset): train : bool, default True Whether to load the training or testing set. transform : function, default None - A user defined callback that transforms each sample. For example: - :: + A user defined callback that transforms each sample. For example:: transform=lambda data, label: (data.astype(np.float32)/255, label) @@ -204,8 +201,7 @@ class CIFAR100(CIFAR10): train : bool, default True Whether to load the training or testing set. transform : function, default None - A user defined callback that transforms each sample. For example: - :: + A user defined callback that transforms each sample. For example:: transform=lambda data, label: (data.astype(np.float32)/255, label) @@ -242,8 +238,7 @@ class ImageRecordDataset(dataset.RecordFileDataset): If 1, always convert images to colored (RGB). transform : function, default None - A user defined callback that transforms each sample. For example: - :: + A user defined callback that transforms each sample. For example:: transform=lambda data, label: (data.astype(np.float32)/255, label) @@ -279,8 +274,7 @@ class ImageFolderDataset(dataset.Dataset): If 0, always convert loaded images to greyscale (1 channel). If 1, always convert loaded images to colored (3 channels). transform : callable, default None - A function that takes data and label and transforms them: - :: + A function that takes data and label and transforms them:: transform = lambda data, label: (data.astype(np.float32)/255, label) diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py index da43b62a1c34..7b5832e1ace6 100644 --- a/python/mxnet/gluon/loss.py +++ b/python/mxnet/gluon/loss.py @@ -621,7 +621,7 @@ class LogisticLoss(Loss): where `pred` is the classifier prediction and `label` is the target tensor containing values -1 or 1 (0 or 1 if `label_format` is binary). - `pred` and `label` can have arbitrary shape as long as they have the same number of elements. + `pred` and `label` can have arbitrary shape as long as they have the same number of elements. Parameters ---------- diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index c95601c6f796..0aa60263a9fa 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -75,7 +75,7 @@ def __len__(self): return len(self._children) def hybridize(self, active=True, **kwargs): - """Activates or deactivates `HybridBlock`s recursively. Has no effect on + """Activates or deactivates `HybridBlock` s recursively. Has no effect on non-hybrid children. Parameters diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py index b57defa54988..b3d8f80318ba 100644 --- a/python/mxnet/gluon/parameter.py +++ b/python/mxnet/gluon/parameter.py @@ -580,7 +580,7 @@ class Constant(Parameter): will not change during training. But you can still update their values manually with the `set_data` method. - `Constant`s can be created with either:: + `Constant` s can be created with either:: const = mx.gluon.Constant('const', [[1,2],[3,4]]) diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py index d23b563add96..bac7b2267db7 100644 --- a/python/mxnet/test_utils.py +++ b/python/mxnet/test_utils.py @@ -805,10 +805,12 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto location : list or tuple or dict Argument values used as location to compute gradient - - if type is list of numpy.ndarray + - if type is list of numpy.ndarray, \ inner elements should have the same order as mxnet.sym.list_arguments(). - - if type is dict of str -> numpy.ndarray + + - if type is dict of str -> numpy.ndarray, \ maps the name of arguments to the corresponding numpy.ndarray. + *In either case, value of all the arguments must be provided.* aux_states : list or tuple or dict, optional The auxiliary states required when generating the executor for the symbol. @@ -829,7 +831,7 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto References --------- - ..[1] https://github.com/Theano/Theano/blob/master/theano/gradient.py + [1] https://github.com/Theano/Theano/blob/master/theano/gradient.py """ assert dtype in (np.float16, np.float32, np.float64) # cannot use finite differences with small eps without high precision