diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 37f48f70c336..dc6089bd5e3e 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -32,8 +32,8 @@ 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'tensordot', - 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', 'stack', - 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax'] + 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', 'stack', 'mean', + 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'std', 'var'] @set_module('mxnet.ndarray.numpy') @@ -2145,3 +2145,196 @@ def argmax(a, axis=None, out=None): array([2., 2.]) """ return _npi.argmax(a, axis=axis, keepdims=False, out=out) + + +@set_module('mxnet.ndarray.numpy') +def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """ + mean(a, axis=None, dtype=None, out=None, keepdims=None) + Compute the arithmetic mean along the specified axis. + Returns the average of the array elements. + The average is taken over the flattened array by default, otherwise over the specified axis. + Parameters + ---------- + a : ndarray + ndarray containing numbers whose mean is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default is None; if provided, + it must have the same shape and type as the expected output + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of ndarray, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + Returns + ------- + m : ndarray, see dtype parameter above + If out=None, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + Notes + ----- + This function differs from the original `numpy.mean + `_ in + the following way(s): + - only ndarray is accepted as valid input, python iterables or scalar is not supported + - default data type for integer input is float32 + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.mean(a) + array(2.5) + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0,:] = 1.0 + >>> a[1,:] = 0.1 + >>> np.mean(a) + array(0.55) + >>> np.mean(a, dtype=np.float64) + array(0.55) + """ + return _npi.mean(a, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + +@set_module('mxnet.ndarray.numpy') +def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: disable=too-many-arguments + """ + Compute the standard deviation along the specified axis. + Returns the standard deviation, a measure of the spread of a distribution, + of the array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Calculate the standard deviation of these values. + axis : None or int or tuple of ints, optional + Axis or axes along which the standard deviation is computed. The + default is to compute the standard deviation of the flattened array. + .. versionadded:: 1.7.0 + If this is a tuple of ints, a standard deviation is performed over + multiple axes, instead of a single axis or all the axes as before. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it is + the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the calculated + values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + If the default value is passed, then `keepdims` will not be + passed through to the `std` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard deviation, + otherwise return a reference to the output array. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.std(a) + 1.1180339887498949 # may vary + >>> np.std(a, axis=0) + array([1., 1.]) + >>> np.std(a, axis=1) + array([0.5, 0.5]) + In single precision, std() can be inaccurate: + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0, :] = 1.0 + >>> a[1, :] = 0.1 + >>> np.std(a) + array(0.45) + >>> np.std(a, dtype=np.float64) + array(0.45, dtype=float64) + """ + return _npi.std(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) + + +@set_module('mxnet.ndarray.numpy') +def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: disable=too-many-arguments + """ + Compute the variance along the specified axis. + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple of ints, optional + Axis or axes along which the variance is computed. The default is to + compute the variance of the flattened array. + .. versionadded:: 1.7.0 + If this is a tuple of ints, a variance is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float32`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + If the default value is passed, then `keepdims` will not be + passed through to the `var` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.var(a) + array(1.25) + >>> np.var(a, axis=0) + array([1., 1.]) + >>> np.var(a, axis=1) + array([0.25, 0.25]) + + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0, :] = 1.0 + >>> a[1, :] = 0.1 + >>> np.var(a) + array(0.2025) + >>> np.var(a, dtype=np.float64) + array(0.2025, dtype=float64) + >>> ((1-0.55)**2 + (0.1-0.55)**2)/2 + 0.2025 + """ + return _npi.var(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 83688774f069..d59eddc3d139 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -52,7 +52,7 @@ 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'tensordot', 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', - 'stack', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax'] + 'stack', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'std', 'var'] # Return code for dispatching indexing function call _NDARRAY_UNSUPPORTED_INDEXING = -1 @@ -1172,11 +1172,9 @@ def mean(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disa """Returns the average of the array elements along given axis.""" raise NotImplementedError - # TODO(junwu): Use mxnet std op instead of onp.std def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: disable=arguments-differ """Returns the standard deviation of the array elements along given axis.""" - ret_np = self.asnumpy().std(axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) - return array(ret_np, dtype=ret_np.dtype, ctx=self.context) + return _mx_np_op.std(self, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) def cumsum(self, axis=None, dtype=None, out=None): """Return the cumulative sum of the elements along the given axis.""" @@ -3588,3 +3586,196 @@ def argmax(a, axis=None, out=None): array([2., 2.]) """ return _mx_nd_np.argmax(a, axis, out) + + +@set_module('mxnet.numpy') +def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """ + mean(a, axis=None, dtype=None, out=None, keepdims=None) + Compute the arithmetic mean along the specified axis. + Returns the average of the array elements. + The average is taken over the flattened array by default, otherwise over the specified axis. + Parameters + ---------- + a : ndarray + ndarray containing numbers whose mean is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default is None; if provided, + it must have the same shape and type as the expected output. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of ndarray, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + Returns + ------- + m : ndarray, see dtype parameter above + If out=None, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + Notes + ----- + This function differs from the original `numpy.mean + `_ in + the following way(s): + - only ndarray is accepted as valid input, python iterables or scalar is not supported + - default data type for integer input is float32 + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.mean(a) + array(2.5) + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0,:] = 1.0 + >>> a[1,:] = 0.1 + >>> np.mean(a) + array(0.55) + >>> np.mean(a, dtype=np.float64) + array(0.55) + """ + return _npi.mean(a, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + +@set_module('mxnet.numpy') +def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=None): + """ + Compute the standard deviation along the specified axis. + Returns the standard deviation, a measure of the spread of a distribution, + of the array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Calculate the standard deviation of these values. + axis : None or int or tuple of ints, optional + Axis or axes along which the standard deviation is computed. The + default is to compute the standard deviation of the flattened array. + .. versionadded:: 1.7.0 + If this is a tuple of ints, a standard deviation is performed over + multiple axes, instead of a single axis or all the axes as before. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it is + the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the calculated + values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + If the default value is passed, then `keepdims` will not be + passed through to the `std` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard deviation, + otherwise return a reference to the output array. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.std(a) + 1.1180339887498949 # may vary + >>> np.std(a, axis=0) + array([1., 1.]) + >>> np.std(a, axis=1) + array([0.5, 0.5]) + In single precision, std() can be inaccurate: + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0, :] = 1.0 + >>> a[1, :] = 0.1 + >>> np.std(a) + array(0.45) + >>> np.std(a, dtype=np.float64) + array(0.45, dtype=float64) + """ + return _npi.std(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) + + +@set_module('mxnet.numpy') +def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=None): + """ + Compute the variance along the specified axis. + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple of ints, optional + Axis or axes along which the variance is computed. The default is to + compute the variance of the flattened array. + .. versionadded:: 1.7.0 + If this is a tuple of ints, a variance is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float32`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + If the default value is passed, then `keepdims` will not be + passed through to the `var` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.var(a) + array(1.25) + >>> np.var(a, axis=0) + array([1., 1.]) + >>> np.var(a, axis=1) + array([0.25, 0.25]) + + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0, :] = 1.0 + >>> a[1, :] = 0.1 + >>> np.var(a) + array(0.2025) + >>> np.var(a, dtype=np.float64) + array(0.2025, dtype=float64) + >>> ((1-0.55)**2 + (0.1-0.55)**2)/2 + 0.2025 + """ + return _npi.var(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 46053b3e0798..e752c5801bb8 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -34,8 +34,8 @@ 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'tensordot', - 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', 'stack', - 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax'] + 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', 'stack', 'mean', + 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'std', 'var'] def _num_outputs(sym): @@ -535,7 +535,7 @@ def mean(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disa The arguments are the same as for :py:func:`mean`, with this array as data. """ - raise NotImplementedError + return _npi.mean(self, axis=axis, dtype=dtype, keepdims=keepdims, out=out) def cumsum(self, axis=None, dtype=None, out=None): """Return the cumulative sum of the elements along the given axis.""" @@ -2508,4 +2508,165 @@ def argmax(a, axis=None, out=None): return _npi.argmax(a, axis=axis, keepdims=False, out=out) +@set_module('mxnet.symbol.numpy') +def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """ + mean(a, axis=None, dtype=None, out=None, keepdims=None) + + Compute the arithmetic mean along the specified axis. + Returns the average of the array elements. + The average is taken over the flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : `_Symbol` + _Symbol containing numbers whose mean is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : _Symbol, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of _Symbol, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + + Returns + ------- + m : _Symbol, see dtype parameter above + If out=None, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + Notes + ----- + This function differs from the original `numpy.mean + `_ in + the following way(s): + + - only _Symbol is accepted as valid input, python iterables or scalar is not supported + - default data type for integer input is float32 + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.mean(a) + array(2.5) + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0,:] = 1.0 + >>> a[1,:] = 0.1 + >>> np.mean(a) + array(0.55) + >>> np.mean(a, dtype=np.float64) + array(0.55) + """ + return _npi.mean(a, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + +@set_module('mxnet.symbol.numpy') +def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: disable=too-many-arguments + """ + Compute the standard deviation along the specified axis. + + Returns the standard deviation, a measure of the spread of a distribution, + of the array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : `_Symbol` + _Symbol containing numbers whose standard deviation is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the standard deviations are computed. + The default is to compute the standard deviation of the flattened array. + If this is a tuple of ints, computation is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the standard deviation. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : _Symbol, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of _Symbol, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + + Returns + ------- + m : _Symbol, see dtype parameter above + If out=None, returns a new array containing the standard deviation values, + otherwise a reference to the output array is returned. + + Notes + ----- + This function differs from the original `numpy.std + `_ in + the following way(s): + + - only _Symbol is accepted as valid input, python iterables or scalar is not supported + - default output data type for integer input is float32 + + """ + return _npi.std(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) + + +@set_module('mxnet.symbol.numpy') +def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: disable=too-many-arguments + """ + Compute the variance along the specified axis. + + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : `_Symbol` + _Symbol containing numbers whose variance is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the variance is computed. + The default is to compute the variance of the flattened array. + If this is a tuple of ints, computation is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the variance. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : _Symbol, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of _Symbol, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + + Returns + ------- + m : _Symbol, see dtype parameter above + If out=None, returns a new array containing the variance values, + otherwise a reference to the output array is returned. + + Notes + ----- + This function differs from the original `numpy.var + `_ in + the following way(s): + + - only _Symbol is accepted as valid input, python iterables or scalar is not supported + - default output data type for integer input is float32 + + """ + return _npi.var(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out) + + _set_np_symbol_class(_Symbol) diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h index 305f1c775cd5..4734d6cc666f 100644 --- a/src/operator/numpy/np_broadcast_reduce_op.h +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -27,6 +27,7 @@ #include #include +#include "../nn/moments-inl.h" #include "../tensor/broadcast_reduce_op.h" namespace mxnet { @@ -282,7 +283,6 @@ inline void NumpyReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs, Stream *s = ctx.get_stream(); MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, IType, { Tensor igrad = outputs[0].FlatTo1D(s); - printf("output size: %lu input_size: %lu\n", outputs[0].Size(), inputs[0].Size()); igrad /= scalar(outputs[0].Size()/inputs[0].Size()); }); } @@ -306,6 +306,139 @@ void NumpyReduceAxesBackwardUseInOut(const nnvm::NodeAttrs& attrs, ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, req, outputs); } +struct NumpyMomentsParam : public dmlc::Parameter { + dmlc::optional> axis; + dmlc::optional dtype; + bool keepdims; + int ddof; + DMLC_DECLARE_PARAMETER(NumpyMomentsParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional>()) + .describe("Axis or axes along which a sum is performed. The default, axis=None, will sum " + "all of the elements of the input array. If axis is negative it counts from the " + "last to the first axis."); + DMLC_DECLARE_FIELD(dtype) + .add_enum("float16", mshadow::kFloat16) + .add_enum("float32", mshadow::kFloat32) + .add_enum("float64", mshadow::kFloat64) + .add_enum("int8", mshadow::kInt8) + .add_enum("int32", mshadow::kInt32) + .add_enum("int64", mshadow::kInt64) + .set_default(dmlc::optional()) + .describe("The type of the returned array and of the accumulator in which the elements are " + "summed. The dtype of a is used by default unless a has an integer dtype of less " + "precision than the default platform integer. In that case, if a is signed then " + "the platform integer is used while if a is unsigned then an unsigned integer of " + "the same precision as the platform integer is used."); + DMLC_DECLARE_FIELD(ddof).set_default(0) + .describe("Starting value for the sum."); + DMLC_DECLARE_FIELD(keepdims).set_default(false) + .describe("If this is set to `True`, the reduced axes are left " + "in the result as dimension with size one."); + } +}; + +template +void ReduceAxesComputeWithWorkspaceImpl(const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs, + const mshadow::Tensor& workspace, + const mxnet::TShape& src_shape, + const mxnet::TShape& dst_shape, + const int ddof = 0) { + using namespace mshadow; + using namespace mshadow::expr; + + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, { + MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, OType, { + const TBlob in_data = inputs[0].reshape(src_shape); + const TBlob out_data = outputs[0].reshape(dst_shape); + BROADCAST_NDIM_SWITCH(dst_shape.ndim(), NDim, { + broadcast::Reduce( + s, out_data, req[0], workspace, in_data); + if (normalize) { + auto out = out_data.FlatTo2D(s); + out /= scalar(src_shape.Size()/dst_shape.Size() - ddof); + } + }); + }); + }); +} + +template +void NumpyMomentsForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; + using namespace mshadow_op; + using namespace mxnet_op; + + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(req.size(), 2U); + CHECK_EQ(outputs.size(), 2U); + + const NumpyMomentsParam& param = nnvm::get(attrs.parsed); + + Stream *s = ctx.get_stream(); + + const TBlob& data = inputs[0]; + const TBlob& moment = outputs[0]; + const TBlob& mean = outputs[1]; + + mxnet::TShape small; + if (param.keepdims) { + small = moment.shape_; + } else { + small = NumpyReduceAxesShapeImpl(data.shape_, param.axis, true); + } + + mxnet::TShape src_shape, dst_shape; + BroadcastReduceShapeCompact(data.shape_, small, &src_shape, &dst_shape); + + MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, { + MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, OType, { + // Get workspace and temp space for data - mean + size_t workspace_size = 0; + BROADCAST_NDIM_SWITCH(dst_shape.ndim(), NDim, { + workspace_size = broadcast::ReduceWorkspaceSize( + s, dst_shape, req[0], src_shape);; + }); + size_t temp_data_size = data.shape_.Size() * sizeof(DType); + size_t temp_mem_size = temp_data_size + workspace_size; + Tensor temp_mem = + ctx.requested[0].get_space_typed(Shape1(temp_mem_size), s); + DType *temp_data_ptr = reinterpret_cast(temp_mem.dptr_); + char *workspace_ptr = temp_mem.dptr_ + temp_data_size; + Tensor workspace(workspace_ptr, Shape1(workspace_size), s); + // Compute mean + ReduceAxesComputeWithWorkspaceImpl( + ctx, inputs, {kWriteTo}, {mean}, workspace, src_shape, dst_shape); + // Compute data - mean + Shape<6> data_shape, mean_shape; + for (int i = 0; i < 6; ++i) { + data_shape[i] = (i < data.shape_.ndim()) ? data.shape_[i] : 1; + mean_shape[i] = (i < small.ndim()) ? small[i] : 1; + } + Kernel::Launch(s, data_shape.Size(), temp_data_ptr, + data.dptr(), mean.dptr(), data_shape, mean_shape); + Tensor temp_data_tensor(temp_data_ptr, Shape1(data.shape_.Size()), s); + TBlob temp_data_blob = TBlob(temp_data_tensor).reshape(data.shape_); + ReduceAxesComputeWithWorkspaceImpl( + ctx, {temp_data_blob}, {req[0]}, {moment}, workspace, src_shape, dst_shape, param.ddof); + if (sqrt) { + Tensor moment_tensor = moment.FlatTo1D(s); + moment_tensor = F(moment_tensor); + } + }); + }); +} + template void NumpyBroadcastToForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc index bccd3af8b2cf..774bc11f5de8 100644 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cc +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cc @@ -30,6 +30,7 @@ namespace op { DMLC_REGISTER_PARAMETER(NumpyReduceAxesParam); DMLC_REGISTER_PARAMETER(NumpyReduceAxesNoDTypeParam); +DMLC_REGISTER_PARAMETER(NumpyMomentsParam); inline bool NumpySumType(const nnvm::NodeAttrs& attrs, std::vector *in_attrs, @@ -166,6 +167,147 @@ NNVM_REGISTER_OP(_backward_np_prod) .set_attr("TIsBackward", true) .set_attr("FCompute", NumpyReduceAxesBackwardUseInOut); +inline bool IsIntType(const int dtype) { + return (dtype == mshadow::kUint8 || + dtype == mshadow::kInt32 || + dtype == mshadow::kInt8 || + dtype == mshadow::kInt64); +} + +inline bool NumpyMeanType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const NumpyReduceAxesParam ¶m = nnvm::get(attrs.parsed); + + if (param.dtype.has_value()) { + if (IsIntType(in_attrs->at(0)) && !IsIntType(param.dtype.value())) { + LOG(FATAL) << "Output cannot be float type when input is integer type for now"; + } + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + } + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +NNVM_REGISTER_OP(_npi_mean) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyReduceAxesShape) +.set_attr("FInferType", NumpyMeanType) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_arguments(NumpyReduceAxesParam::__FIELDS__()) +.set_attr("FCompute", NumpyReduceAxesCompute) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_mean"}); + +NNVM_REGISTER_OP(_backward_np_mean) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_num_inputs(1) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +inline bool NumpyMomentsShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 2U); + if (!shape_is_known(in_attrs->at(0))) { + return false; + } + const NumpyMomentsParam& param = nnvm::get(attrs.parsed); + mxnet::TShape out_shape = NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, out_shape); + SHAPE_ASSIGN_CHECK(*out_attrs, 1, out_shape); + + return shape_is_known(out_attrs->at(0)) && shape_is_known(out_attrs->at(1)); +} + +inline bool NumpyMomentsType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 2U); + const NumpyMomentsParam ¶m = nnvm::get(attrs.parsed); + + if (param.dtype.has_value()) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + } + TYPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(0)); + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +NNVM_REGISTER_OP(_npi_std) +.set_num_inputs(1) +.set_num_outputs(2) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyMomentsShape) +.set_attr("FInferType", NumpyMomentsType) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FListOutputNames", + [](const NodeAttrs& attrs) { + return std::vector{"std", "mean"}; + }) +.set_attr("FNumVisibleOutputs", + [](const NodeAttrs& attrs) { + return 1; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_arguments(NumpyMomentsParam::__FIELDS__()) +.set_attr("FCompute", NumpyMomentsForward) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", MakeZeroGradNodes); + +NNVM_REGISTER_OP(_npi_var) +.set_num_inputs(1) +.set_num_outputs(2) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyMomentsShape) +.set_attr("FInferType", NumpyMomentsType) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FListOutputNames", + [](const NodeAttrs& attrs) { + return std::vector{"var", "mean"}; + }) +.set_attr("FNumVisibleOutputs", + [](const NodeAttrs& attrs) { + return 1; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_arguments(NumpyMomentsParam::__FIELDS__()) +.set_attr("FCompute", NumpyMomentsForward) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", MakeZeroGradNodes); + bool NumpyBroadcastToShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_attrs, mxnet::ShapeVector *out_attrs) { diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cu b/src/operator/numpy/np_broadcast_reduce_op_value.cu index d1d33cc6d8b0..53e78787d47d 100644 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cu +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cu @@ -50,6 +50,18 @@ NNVM_REGISTER_OP(_np_prod) NNVM_REGISTER_OP(_backward_np_prod) .set_attr("FCompute", NumpyReduceAxesBackwardUseInOut); +NNVM_REGISTER_OP(_npi_mean) +.set_attr("FCompute", NumpyReduceAxesCompute); + +NNVM_REGISTER_OP(_backward_np_mean) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +NNVM_REGISTER_OP(_npi_std) +.set_attr("FCompute", NumpyMomentsForward); + +NNVM_REGISTER_OP(_npi_var) +.set_attr("FCompute", NumpyMomentsForward); + NNVM_REGISTER_OP(_np_broadcast_to) .set_attr("FCompute", NumpyBroadcastToForward); diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 49b5b645d604..c137b30f89c3 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -66,7 +66,7 @@ def tensordot_backward(a, b, axes=2): b_axes_summed[i] = (b_axes_summed[i] + b.ndim) % b.ndim if len(a_axes_summed) != len(b_axes_summed): - raise ValueError('Axes length mismatch') + raise ValueError('Axes length mismatch') a_axes_remained = [] for i in range(a.ndim): @@ -179,7 +179,7 @@ def test_np_dot(): ((3, 4, 5), (5, )), # Case 4 ((3, 4, 5), (5, 2)), # Case 5 ((5,), (5, 2)), - ((3, 5, 4), (5, 4, 3)), + ((3, 5, 4), (5, 4, 3)), ((3, 4), (5, 4, 3)), ((4,), (5, 4, 3)) ] @@ -390,6 +390,134 @@ def _test_np_exception(func, shape, dim): _test_np_exception(func, shape, dim) +@with_seed() +@use_np +def test_np_mean(): + class TestMean(HybridBlock): + def __init__(self, axis=None, dtype=None, keepdims=False): + super(TestMean, self).__init__() + self._axis = axis + self._dtype = dtype + self._keepdims = keepdims + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.np.mean(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims) + + def is_int(dtype): + return 'int' in dtype + + in_data_dim = random.choice([2, 3, 4]) + shape = rand_shape_nd(in_data_dim, dim=3) + acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64', + 'int8': 'int32', 'int32': 'int64', 'int64': 'int64'} + for hybridize in [False, True]: + for keepdims in [True, False]: + for axis in ([i for i in range(in_data_dim)] + [(), None]): + for itype in ['float16', 'float32', 'float64']: + for dtype in ['float16', 'float32', 'float64']: + if is_int(dtype) and not is_int(itype): + continue + # test gluon + test_mean = TestMean(axis=axis, dtype=dtype, keepdims=keepdims) + if hybridize: + test_mean.hybridize() + if is_int(itype): + x = _np.random.randint(-128, 128, shape, dtype=itype) + x = mx.nd.array(x, dtype=itype) + else: + x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype) + x = x.as_np_ndarray() + x.attach_grad() + + expected_ret = _np.mean(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims) + expected_ret = expected_ret.astype(dtype) + with mx.autograd.record(): + y = test_mean(x) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3, + atol=1e-5 if dtype == 'float16' else 1e-5) + + y.backward() + N = x.size / y.size + assert same(x.grad.asnumpy(), _np.ones(shape=x.shape, dtype=x.dtype) / N) + + # test numeric + if itype == 'float32' and dtype == 'float32': + x_sym = mx.sym.Variable("x").as_np_ndarray() + mx_sym = mx.sym.np.mean(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray() + check_numeric_gradient(mx_sym, [x.as_nd_ndarray()], + numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32) + + # test imperative + mx_out = np.mean(x, axis=axis, dtype=dtype, keepdims=keepdims) + np_out = _np.mean(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@use_np +def test_np_moment(): + class TestMoment(HybridBlock): + def __init__(self, name, axis=None, dtype=None, keepdims=False, ddof=0): + super(TestMoment, self).__init__() + self._name = name + self._axis = axis + self._dtype = dtype + self._keepdims = keepdims + self._ddof = ddof + + def hybrid_forward(self, F, a, *args, **kwargs): + return getattr(F.np, self._name)(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims, ddof=self._ddof) + + def is_int(dtype): + return 'int' in dtype + + def legalize_shape(shape): + shape_ = list(shape) + for i in range(len(shape_)): + shape_[i] += 1 + return tuple(shape_) + + in_data_dim = random.choice([2, 3, 4]) + shape = rand_shape_nd(in_data_dim, dim=3) + shape = legalize_shape(shape) + acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64', + 'int8': 'float64', 'int32': 'float64', 'int64': 'float64'} + + for name in ['var', 'std']: + for hybridize in [False, True]: + for ddof in [0, 1]: + for keepdims in [True, False]: + for axis in ([i for i in range(in_data_dim)] + [(), None]): + for itype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']: + for dtype in ['float16', 'float32', 'float64']: + if is_int(dtype) and not is_int(itype) or is_int(itype) and is_int(dtype): + continue + atol = 3e-4 if itype == 'float16' or dtype == 'float16' else 1e-5 + rtol = 1e-2 if itype == 'float16' or dtype == 'float16' else 1e-3 + # test gluon + test_moment = TestMoment(name, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof) + if hybridize: + test_moment.hybridize() + if is_int(itype): + x = _np.random.randint(-16, 16, shape, dtype=itype) + x = mx.nd.array(x) + else: + x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype) + x = x.as_np_ndarray() + x.attach_grad() + expected_ret = getattr(_np, name)(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims, ddof=ddof) + expected_ret = expected_ret.astype(dtype) + y = test_moment(x) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=rtol, atol=atol, use_broadcast=False, equal_nan=True) + + # test imperative + mx_out = getattr(np, name)(x, axis=axis, dtype=dtype, keepdims=keepdims, ddof=ddof) + np_out = getattr(_np, name)(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims, ddof=ddof).astype(dtype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=rtol, atol=atol, use_broadcast=False, equal_nan=True) + + @with_seed() @use_np def test_np_linspace():