From 9133c73c5c1d0b807daecc284d9a8bb05814721b Mon Sep 17 00:00:00 2001 From: Doron Behar Date: Mon, 21 Oct 2024 11:49:23 +0300 Subject: [PATCH] unumpy.average: init Ref: https://github.com/lmfit/uncertainties/issues/38 --- CHANGES.rst | 1 + doc/numpy_guide.rst | 23 +++++++++++ tests/test_unumpy.py | 67 ++++++++++++++++++++++++++++++++ uncertainties/unumpy/core.py | 74 ++++++++++++++++++++++++++++++++++++ 4 files changed, 165 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index c45ef266..9058854f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,7 @@ Unreleased Fixes: - fix `readthedocs` configuration so that the build passes (#254) +- Add ``unumpy.average`` to calculate uncertainties aware average (#38) 3.2.2 2024-July-08 ----------------------- diff --git a/doc/numpy_guide.rst b/doc/numpy_guide.rst index f350765f..bac88f52 100644 --- a/doc/numpy_guide.rst +++ b/doc/numpy_guide.rst @@ -144,6 +144,29 @@ functions is available in the documentation for :mod:`uncertainties.umath`. .. index:: pair: testing and operations (in arrays); NaN +Uncertainties aware average +--------------------------- + +If you have measured a certain value multiple times, with a different +uncertainty every measurement. Averaging over the results in a manner aware of +the different uncertainties, is not trivial. The function ``unumpy.average()`` +does that: + +>>> measurements = numpy.array([2.1, 2.0, 2.05, 2.08, 2.02]) +>>> stds = numpy.array([0.05, 0.03, 0.04, 0.06, 0.05]) +>>> arr = unumpy.uarray(measurements, stds) +>>> unumpy.average(arr) +2.03606+/-0.019 + +Note how that function gives a value different from numpy's ``mean`` function: + +>>> numpy.mean(arr) +2.050+/-0.019 + +If you have an array with correlated values, the covariances will be considered +as well. You can also specify an ``axes`` argument, to specify a certain axis +or a tuple of axes upon which to average the result. + NaN testing and NaN-aware operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/test_unumpy.py b/tests/test_unumpy.py index 783fd336..2860ee8b 100644 --- a/tests/test_unumpy.py +++ b/tests/test_unumpy.py @@ -1,3 +1,5 @@ +import pytest + try: import numpy except ImportError: @@ -300,3 +302,68 @@ def test_array_comparisons(): # For matrices, 1D arrays are converted to 2D arrays: mat = unumpy.umatrix([1, 2], [1, 4]) assert numpy.all((mat == [mat[0, 0], 4]) == [True, False]) + + +class TestAverage: + arr1d = unumpy.uarray( + [2.1, 2.0, 2.05, 2.08, 2.02], + [0.05, 0.03, 0.04, 0.06, 0.05], + ) + def __init__(self): + sigma2d = 0.3 + means2d = np.linspace(4, 20, num=50).reshape(5, 10) + self.arr2d = unumpy.uarray( + np.random.normal(loc=means, scale=sigma2d), + np.random.uniform(low=0, high=sigma2d) + ) + meansNd = np.random.rand(4,7,5,2,10,14) + 10 + self.arrNd = unumpy.uarray( + meansNd, + np.random.uniform(low=0, high=0.2, size=meansNd.shape) + ) + def test_average_type_check(): + with pytest.raises(ValueError): + unumpy.average(numpy.array(["bla"])) + def test_average_example(): + """Tests the example from the docs.""" + avg = unumpy.average(self.arr1d) + assert np.isclose(avg.n, 2.0360612043435338) + assert np.isclose(avg.s, 0.018851526708200846) + @pytest.mark.parametrize("invalid_axis", [1,2,3]) + def test_average1d_invalid_axes(invalid_axis): + with pytest.raises(ValueError): + unumpy.average(self.arr1d, axes=invalid_axis) + @pytest.mark.parametrize("invalid_axis", [2,3]) + def test_average2d_invalid_axes(invalid_axis): + with pytest.raises(ValueError): + unumpy.average(self.arr1d, axes=invalid_axis) + @pytest.mark.parametrize( + "expected_shape, axis_argument", [ + ((), None), + # According to the linspace reshape in __init__ + ((5,), 1), + ((5,), (1,)), + ((10,), 0), + ((10,), (0,)), + ] + ) + def test_average2d_shape(expected_shape, axis_argument): + assert unumpy.average( + self.arr2d, + axes=axis_argument + ).shape == expected_shape + @pytest.mark.parametrize( + "expected_shape, axis_argument", [ + ((), None), + # According to random.rand() argument in __init__ + ((4,5,10), (1,3,5)), + ((10,), (0,1,2,3,4,6)), + ((14,), (0,1,2,3,4,5)), + ((7,2), (0,2,4,5,6)), + ] + ) + def test_averageNd_shape(expected_shape, axis_argument): + assert unumpy.average( + self.arrNd, + axes=axis_argument + ).shape == expected_shape diff --git a/uncertainties/unumpy/core.py b/uncertainties/unumpy/core.py index fc9e8ac4..3e8bd728 100644 --- a/uncertainties/unumpy/core.py +++ b/uncertainties/unumpy/core.py @@ -30,10 +30,84 @@ # Utilities: "nominal_values", "std_devs", + "average", # Classes: "matrix", ] +def _flatten_array_by_axes(arr, axes): + """ + Return arr, reshaped such that the axes listed in parameter ``axes`` are + flattened, and become the last axis of the resulting array. A utility used + for `:func:uncertainties.unumpy.average`. + """ + arr = np.asanyarray(arr) + if axes is None: + return arr.flatten() + # The following sanity checks on axes can be replaced by + # np.lib.array_utils.normalize_axis_tuple once we will require a minimum + # version of numpy 2.x. + if type(axes) not in (tuple, list): + try: + axes = [operator.index(axes)] + except TypeError: + pass + axes = tuple(axes) + if len(set(axes)) != len(axes): + raise ValueError('repeated axis found in axes argument') + # This one is not checked by np.lib.array_utils.normalize_axis_tuple + if max(axes) >= arr.ndim: + raise ValueError( + f'Cannot average over an inexistent axis {max(axes)} >= arr.ndim = ' + f'{arr.ndim}' + ) + new_shape = [] + # To hold the product of the dimensions to flatten + flatten_size = 1 + for i in range(len(arr.shape)): + if i in axes: + flatten_size *= arr.shape[i] # Multiply dimensions to flatten + else: + new_shape.append(arr.shape[i]) # Keep the dimension + # This way the shapes to average over are flattend, in the end. + new_shape.append(flatten_size) + return arr.reshape(*new_shape) + +def average(arr, axes=None): + """ + Return a weighted averaged along with a weighted mean over a certain axis + or a axes. The formulas implemented by this are: + + $$ \mu = \frac{\sum_i (x_i/\sigma_i^2)}{\sum_i \sigma_i^{-2}}$$ + + $$\sigma_\mu = \frac{\sqrt{\sum_{i,j} \sigma_i^{-2} \sigma_j^{-2} \cdot Cov(x_i, x_j)}}{\sum_i \sigma_i^{-2}}$$ + + Where of course $Cov(x_i, x_i) = \sigma_i^2$. + + By default, operates on all axes of the given array. + """ + arr = _flatten_array_by_axes(arr, axes) + if not isinstance(arr.flat[0], core.Variable): + raise ValueError( + "unumpy.average is meant to operate upon numpy arrays of ufloats, " + "not pure numpy arrays" + ) + cov_matrix = covariance_matrix(arr) + weights = numpy.diagonal(cov_matrix, axis1=-2, axis2=-1)**-1 + weights_sum = weights.sum(axis=-1) + return uarray( + (nominal_values(arr)*weights).sum(axis=-1) / weights_sum, + numpy.sqrt( + numpy.einsum( + '...i,...ij,...j', + weights, + cov_matrix, + weights, + ) + ) / weights_sum + ) + + ############################################################################### # Utilities: