From 49f08884386a3613016b5afb6c2c281c98e4163c Mon Sep 17 00:00:00 2001 From: Hao Jin Date: Wed, 25 Sep 2019 05:57:00 +0000 Subject: [PATCH] numpy-compatible histogram --- python/mxnet/ndarray/numpy/_op.py | 49 +++++++++++++++++++++++++- python/mxnet/numpy/multiarray.py | 34 +++++++++++++++++- python/mxnet/symbol/numpy/_symbol.py | 49 +++++++++++++++++++++++++- src/operator/tensor/histogram.cc | 1 + tests/python/unittest/test_numpy_op.py | 17 +++++++++ 5 files changed, 147 insertions(+), 3 deletions(-) diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index e8332f1a83ef..d5bc6d9423f2 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -31,7 +31,7 @@ 'arctan2', 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', - 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'tensordot', + 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'tensordot', 'histogram', 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', 'stack', 'vstack', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'std', 'var', 'indices', 'copysign', 'ravel', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'hypot', 'rad2deg', 'deg2rad', @@ -740,6 +740,53 @@ def tensordot(a, b, axes=2): return _npi.tensordot(a, b, a_axes_summed, b_axes_summed) +@set_module('mxnet.ndarray.numpy') +def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): # pylint: disable=too-many-arguments + """ + Compute the histogram of a set of data. + + Parameters + ---------- + a : NDArray + Input data. The histogram is computed over the flattened array. + bins : int or NDArray + If `bins` is an int, it defines the number of equal-width + bins in the given range (10, by default). If `bins` is a + sequence, it defines a monotonically increasing array of bin edges, + including the rightmost edge, allowing for non-uniform bin widths. + .. versionadded:: 1.11.0 + If `bins` is a string, it defines the method used to calculate the + optimal bin width, as defined by `histogram_bin_edges`. + range : (float, float) + The lower and upper range of the bins. Required when `bins` is an integer. + Values outside the range are ignored. The first element of the range must + be less than or equal to the second. + normed : bool, optional + Not supported yet, coming soon. + weights : array_like, optional + Not supported yet, coming soon. + density : bool, optional + Not supported yet, coming soon. + """ + if normed is True: + raise NotImplementedError("normed is not supported yet...") + if weights is not None: + raise NotImplementedError("weights is not supported yet...") + if density is True: + raise NotImplementedError("density is not supported yet...") + if isinstance(bins, numeric_types): + if range is None: + raise NotImplementedError("automatic range is not supported yet...") + return _npi.histogram(a, bin_cnt=bins, range=range) + if isinstance(bins, (list, tuple)): + raise NotImplementedError("array_like bins is not supported yet...") + if isinstance(bins, str): + raise NotImplementedError("string bins is not supported yet...") + if isinstance(bins, NDArray): + return _npi.histogram(a, bins=bins) + raise ValueError("np.histogram fails with", locals()) + + @set_module('mxnet.ndarray.numpy') def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0, ctx=None): # pylint: disable=too-many-arguments r""" diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 7ba0f0d7d813..dc89f2d59e38 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -51,11 +51,12 @@ 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', - 'tensordot', 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', + 'tensordot', 'histogram', 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', 'stack', 'vstack', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'std', 'var', 'indices', 'copysign', 'ravel', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'arctan2', 'hypot', 'rad2deg', 'deg2rad', 'unique', 'lcm', 'tril', 'identity', 'take'] + # Return code for dispatching indexing function call _NDARRAY_UNSUPPORTED_INDEXING = -1 _NDARRAY_BASIC_INDEXING = 0 @@ -3596,6 +3597,37 @@ def tensordot(a, b, axes=2): return _mx_nd_np.tensordot(a, b, axes) +@set_module('mxnet.numpy') +def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): # pylint-disable=too-many-arguments + """ + Compute the histogram of a set of data. + + Parameters + ---------- + a : NDArray + Input data. The histogram is computed over the flattened array. + bins : int or NDArray + If `bins` is an int, it defines the number of equal-width + bins in the given range (10, by default). If `bins` is a + sequence, it defines a monotonically increasing array of bin edges, + including the rightmost edge, allowing for non-uniform bin widths. + .. versionadded:: 1.11.0 + If `bins` is a string, it defines the method used to calculate the + optimal bin width, as defined by `histogram_bin_edges`. + range : (float, float) + The lower and upper range of the bins. Required when `bins` is an integer. + Values outside the range are ignored. The first element of the range must + be less than or equal to the second. + normed : bool, optional + Not supported yet, coming soon. + weights : array_like, optional + Not supported yet, coming soon. + density : bool, optional + Not supported yet, coming soon. + """ + return _mx_nd_np.histogram(a, bins=bins, range=range, normed=normed, weights=weights, density=density) + + @set_module('mxnet.numpy') def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0, ctx=None): # pylint: disable=too-many-arguments r""" diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py index 9c055c401b31..c8b1823dc5bf 100644 --- a/python/mxnet/symbol/numpy/_symbol.py +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -33,7 +33,7 @@ 'sin', 'cos', 'tan', 'sinh', 'cosh', 'tanh', 'log10', 'sqrt', 'cbrt', 'abs', 'absolute', 'exp', 'expm1', 'arcsin', 'arccos', 'arctan', 'sign', 'log', 'degrees', 'log2', 'log1p', 'rint', 'radians', 'reciprocal', 'square', 'negative', 'fix', 'ceil', 'floor', - 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'tensordot', + 'trunc', 'logical_not', 'arcsinh', 'arccosh', 'arctanh', 'tensordot', 'histogram', 'linspace', 'expand_dims', 'tile', 'arange', 'split', 'concatenate', 'stack', 'vstack', 'mean', 'maximum', 'minimum', 'swapaxes', 'clip', 'argmax', 'std', 'var', 'indices', 'copysign', 'ravel', 'hanning', 'hamming', 'blackman', 'flip', 'around', 'hypot', 'rad2deg', 'deg2rad', @@ -1265,6 +1265,53 @@ def tensordot(a, b, axes=2): return _npi.tensordot(a, b, a_axes_summed, b_axes_summed) +@set_module('mxnet.symbol.numpy') +def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): # pylint: disable= too-many-arguments + """ + Compute the histogram of a set of data. + + Parameters + ---------- + a : Symbol + Input data. The histogram is computed over the flattened array. + bins : int or Symbol + If `bins` is an int, it defines the number of equal-width + bins in the given range (10, by default). If `bins` is a + sequence, it defines a monotonically increasing array of bin edges, + including the rightmost edge, allowing for non-uniform bin widths. + .. versionadded:: 1.11.0 + If `bins` is a string, it defines the method used to calculate the + optimal bin width, as defined by `histogram_bin_edges`. + range : (float, float) + The lower and upper range of the bins. Required when `bins` is an integer. + Values outside the range are ignored. The first element of the range must + be less than or equal to the second. + normed : bool, optional + Not supported yet, coming soon. + weights : array_like, optional + Not supported yet, coming soon. + density : bool, optional + Not supported yet, coming soon. + """ + if normed is True: + raise NotImplementedError("normed is not supported yet...") + if weights is not None: + raise NotImplementedError("weights is not supported yet...") + if density is True: + raise NotImplementedError("density is not supported yet...") + if isinstance(bins, numeric_types): + if range is None: + raise NotImplementedError("automatic range is not avaialble yet...") + return _npi.histogram(a, bin_cnt=bins, range=range) + if isinstance(bins, (list, tuple)): + raise NotImplementedError("array_like bins is not supported yet...") + if isinstance(bins, str): + raise NotImplementedError("string bins is not supported yet...") + if isinstance(bins, Symbol): + return _npi.histogram(a, bins) + raise ValueError("histogram fails with", locals()) + + @set_module('mxnet.symbol.numpy') def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0, ctx=None): # pylint: disable=too-many-arguments r""" diff --git a/src/operator/tensor/histogram.cc b/src/operator/tensor/histogram.cc index 754475bff9ad..b7896e9e0016 100644 --- a/src/operator/tensor/histogram.cc +++ b/src/operator/tensor/histogram.cc @@ -123,6 +123,7 @@ void HistogramForwardImpl(const OpContext& ctx, DMLC_REGISTER_PARAMETER(HistogramParam); NNVM_REGISTER_OP(_histogram) +.add_alias("_npi_histogram") .describe(R"code(This operators implements the histogram function. Example:: diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index eaf3032d526d..2f6c94a15caf 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -1738,6 +1738,23 @@ def hybrid_forward(self, F, a): assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) +@with_seed() +@use_np +def test_np_histogram(): + shapes = [(), (3, 4), (3, 0)] + + for shape in shapes: + mx_a = np.random.uniform(0.0, 10.0, size=shape) + np_a = mx_a.asnumpy() + mx_bins = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5., 6., 7., 8., 9., 10.]) + np_bins = mx_bins.asnumpy() + for bins, _range in [(20, (0.0, 10.0)), (mx_bins, None)]: + mx_cnts, mx_bins = np.histogram(mx_a, bins=bins, range=_range) + np_cnts, np_bins = _np.histogram(np_a, bins=bins if isinstance(bins, mx.base.numeric_types) else bins.asnumpy(), range=_range) + assert_almost_equal(mx_cnts.asnumpy(), np_cnts, rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_bins.asnumpy(), np_bins, rtol=1e-3, atol=1e-5) + + @with_seed() @use_np def test_np_choice():