From 9133c73c5c1d0b807daecc284d9a8bb05814721b Mon Sep 17 00:00:00 2001
From: Doron Behar <doron.behar@gmail.com>
Date: Mon, 21 Oct 2024 11:49:23 +0300
Subject: [PATCH] unumpy.average: init

Ref: https://github.com/lmfit/uncertainties/issues/38
---
 CHANGES.rst                  |  1 +
 doc/numpy_guide.rst          | 23 +++++++++++
 tests/test_unumpy.py         | 67 ++++++++++++++++++++++++++++++++
 uncertainties/unumpy/core.py | 74 ++++++++++++++++++++++++++++++++++++
 4 files changed, 165 insertions(+)

diff --git a/CHANGES.rst b/CHANGES.rst
index c45ef266..9058854f 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -7,6 +7,7 @@ Unreleased
 Fixes:
 
 - fix `readthedocs` configuration so that the build passes (#254)
+- Add ``unumpy.average`` to calculate uncertainties aware average (#38)
 
 3.2.2   2024-July-08
 -----------------------
diff --git a/doc/numpy_guide.rst b/doc/numpy_guide.rst
index f350765f..bac88f52 100644
--- a/doc/numpy_guide.rst
+++ b/doc/numpy_guide.rst
@@ -144,6 +144,29 @@ functions is available in the documentation for  :mod:`uncertainties.umath`.
 .. index::
    pair: testing and operations (in arrays); NaN
 
+Uncertainties aware average
+---------------------------
+
+If you have measured a certain value multiple times, with a different
+uncertainty every measurement. Averaging over the results in a manner aware of
+the different uncertainties, is not trivial. The function ``unumpy.average()``
+does that:
+
+>>> measurements = numpy.array([2.1, 2.0, 2.05, 2.08, 2.02])
+>>> stds = numpy.array([0.05, 0.03, 0.04, 0.06, 0.05])
+>>> arr = unumpy.uarray(measurements, stds)
+>>> unumpy.average(arr)
+2.03606+/-0.019
+
+Note how that function gives a value different from numpy's ``mean`` function:
+
+>>> numpy.mean(arr)
+2.050+/-0.019
+
+If you have an array with correlated values, the covariances will be considered
+as well. You can also specify an ``axes`` argument, to specify a certain axis
+or a tuple of axes upon which to average the result.
+
 NaN testing and NaN-aware operations
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/tests/test_unumpy.py b/tests/test_unumpy.py
index 783fd336..2860ee8b 100644
--- a/tests/test_unumpy.py
+++ b/tests/test_unumpy.py
@@ -1,3 +1,5 @@
+import pytest
+
 try:
     import numpy
 except ImportError:
@@ -300,3 +302,68 @@ def test_array_comparisons():
     # For matrices, 1D arrays are converted to 2D arrays:
     mat = unumpy.umatrix([1, 2], [1, 4])
     assert numpy.all((mat == [mat[0, 0], 4]) == [True, False])
+
+
+class TestAverage:
+    arr1d = unumpy.uarray(
+        [2.1, 2.0, 2.05, 2.08, 2.02],
+        [0.05, 0.03, 0.04, 0.06, 0.05],
+    )
+    def __init__(self):
+        sigma2d = 0.3
+        means2d = np.linspace(4, 20, num=50).reshape(5, 10)
+        self.arr2d = unumpy.uarray(
+            np.random.normal(loc=means, scale=sigma2d),
+            np.random.uniform(low=0, high=sigma2d)
+        )
+        meansNd = np.random.rand(4,7,5,2,10,14) + 10
+        self.arrNd = unumpy.uarray(
+            meansNd,
+            np.random.uniform(low=0, high=0.2, size=meansNd.shape)
+        )
+    def test_average_type_check():
+        with pytest.raises(ValueError):
+            unumpy.average(numpy.array(["bla"]))
+    def test_average_example():
+        """Tests the example from the docs."""
+        avg = unumpy.average(self.arr1d)
+        assert np.isclose(avg.n, 2.0360612043435338)
+        assert np.isclose(avg.s, 0.018851526708200846)
+    @pytest.mark.parametrize("invalid_axis", [1,2,3])
+    def test_average1d_invalid_axes(invalid_axis):
+        with pytest.raises(ValueError):
+            unumpy.average(self.arr1d, axes=invalid_axis)
+    @pytest.mark.parametrize("invalid_axis", [2,3])
+    def test_average2d_invalid_axes(invalid_axis):
+        with pytest.raises(ValueError):
+            unumpy.average(self.arr1d, axes=invalid_axis)
+    @pytest.mark.parametrize(
+        "expected_shape, axis_argument", [
+            ((), None),
+            # According to the linspace reshape in __init__
+            ((5,), 1),
+            ((5,), (1,)),
+            ((10,), 0),
+            ((10,), (0,)),
+        ]
+    )
+    def test_average2d_shape(expected_shape, axis_argument):
+        assert unumpy.average(
+            self.arr2d,
+            axes=axis_argument
+        ).shape == expected_shape
+    @pytest.mark.parametrize(
+        "expected_shape, axis_argument", [
+            ((), None),
+            # According to random.rand() argument in __init__
+            ((4,5,10), (1,3,5)),
+            ((10,), (0,1,2,3,4,6)),
+            ((14,), (0,1,2,3,4,5)),
+            ((7,2), (0,2,4,5,6)),
+        ]
+    )
+    def test_averageNd_shape(expected_shape, axis_argument):
+        assert unumpy.average(
+            self.arrNd,
+            axes=axis_argument
+        ).shape == expected_shape
diff --git a/uncertainties/unumpy/core.py b/uncertainties/unumpy/core.py
index fc9e8ac4..3e8bd728 100644
--- a/uncertainties/unumpy/core.py
+++ b/uncertainties/unumpy/core.py
@@ -30,10 +30,84 @@
     # Utilities:
     "nominal_values",
     "std_devs",
+    "average",
     # Classes:
     "matrix",
 ]
 
+def _flatten_array_by_axes(arr, axes):
+    """
+    Return arr, reshaped such that the axes listed in parameter ``axes`` are
+    flattened, and become the last axis of the resulting array. A utility used
+    for `:func:uncertainties.unumpy.average`.
+    """
+    arr = np.asanyarray(arr)
+    if axes is None:
+        return arr.flatten()
+    # The following sanity checks on axes can be replaced by
+    # np.lib.array_utils.normalize_axis_tuple once we will require a minimum
+    # version of numpy 2.x.
+    if type(axes) not in (tuple, list):
+        try:
+            axes = [operator.index(axes)]
+        except TypeError:
+            pass
+    axes = tuple(axes)
+    if len(set(axes)) != len(axes):
+        raise ValueError('repeated axis found in axes argument')
+    # This one is not checked by np.lib.array_utils.normalize_axis_tuple
+    if max(axes) >= arr.ndim:
+        raise ValueError(
+            f'Cannot average over an inexistent axis {max(axes)} >= arr.ndim = '
+            f'{arr.ndim}'
+        )
+    new_shape = []
+    # To hold the product of the dimensions to flatten
+    flatten_size = 1
+    for i in range(len(arr.shape)):
+        if i in axes:
+            flatten_size *= arr.shape[i]  # Multiply dimensions to flatten
+        else:
+            new_shape.append(arr.shape[i])  # Keep the dimension
+    # This way the shapes to average over are flattend, in the end.
+    new_shape.append(flatten_size)
+    return arr.reshape(*new_shape)
+
+def average(arr, axes=None):
+    """
+    Return a weighted averaged along with a weighted mean over a certain axis
+    or a axes. The formulas implemented by this are:
+
+    $$ \mu = \frac{\sum_i (x_i/\sigma_i^2)}{\sum_i \sigma_i^{-2}}$$
+
+    $$\sigma_\mu = \frac{\sqrt{\sum_{i,j} \sigma_i^{-2} \sigma_j^{-2} \cdot Cov(x_i, x_j)}}{\sum_i \sigma_i^{-2}}$$
+
+    Where of course $Cov(x_i, x_i) = \sigma_i^2$.
+
+    By default, operates on all axes of the given array.
+    """
+    arr = _flatten_array_by_axes(arr, axes)
+    if not isinstance(arr.flat[0], core.Variable):
+        raise ValueError(
+            "unumpy.average is meant to operate upon numpy arrays of ufloats, "
+            "not pure numpy arrays"
+        )
+    cov_matrix = covariance_matrix(arr)
+    weights = numpy.diagonal(cov_matrix, axis1=-2, axis2=-1)**-1
+    weights_sum = weights.sum(axis=-1)
+    return uarray(
+        (nominal_values(arr)*weights).sum(axis=-1) / weights_sum,
+        numpy.sqrt(
+            numpy.einsum(
+                '...i,...ij,...j',
+                weights,
+                cov_matrix,
+                weights,
+            )
+        ) / weights_sum
+    )
+
+
 ###############################################################################
 # Utilities: