diff --git a/numojo/core/ndarray.mojo b/numojo/core/ndarray.mojo index 518fb1b5..89eeae62 100644 --- a/numojo/core/ndarray.mojo +++ b/numojo/core/ndarray.mojo @@ -14,20 +14,20 @@ import builtin.bool as builtin_bool from builtin.type_aliases import Origin from collections import Dict from collections.optional import Optional -from math import log10 from memory import UnsafePointer, memset_zero, memcpy -from python import Python, PythonObject +from math import log10 +from python import PythonObject from sys import simdwidthof from tensor import Tensor from utils import Variant -from utils.numerics import isnan, isinf import numojo.core._array_funcs as _af +from numojo.core._math_funcs import Vectorized +from numojo.core.datatypes import TypeCoercion, _concise_dtype_str +from numojo.core.item import Item from numojo.core.ndshape import NDArrayShape from numojo.core.ndstrides import NDArrayStrides -from numojo.core.item import Item from numojo.core.own_data import OwnData -from numojo.core._math_funcs import Vectorized from numojo.core.utility import ( _get_offset, _update_flags, @@ -36,32 +36,24 @@ from numojo.core.utility import ( to_numpy, to_tensor, bool_to_numeric, - is_floattype, ) - +import numojo.routines.bitwise as bitwise +import numojo.routines.creation as creation from numojo.routines.io.formatting import ( - format_floating_precision, - format_floating_scientific, format_value, PrintOptions, - printoptions, GLOBAL_PRINT_OPTIONS, ) -import numojo.routines.creation as creation -import numojo.routines.creation as creation -import numojo.routines.sorting as sorting -import numojo.routines.math.arithmetic as arithmetic -import numojo.routines.logic.comparison as comparison -import numojo.routines.math.rounding as rounding -import numojo.routines.bitwise as bitwise import numojo.routines.linalg as linalg -from numojo.core.datatypes import TypeCoercion, _concise_dtype_str -from numojo.routines.statistics.averages import mean -from numojo.routines.math.products import prod, cumprod -from numojo.routines.math.sums import sum, cumsum -from numojo.routines.logic.truth import any from numojo.routines.linalg.products import matmul +import numojo.routines.logic.comparison as comparison from numojo.routines.manipulation import reshape, ravel +import numojo.routines.math.arithmetic as arithmetic +from numojo.routines.math.products import prod, cumprod +import numojo.routines.math.rounding as rounding +from numojo.routines.math.sums import sum, cumsum +import numojo.routines.sorting as sorting +from numojo.routines.statistics.averages import mean # ===----------------------------------------------------------------------===# # NDArray @@ -3426,19 +3418,33 @@ struct NDArray[dtype: DType = DType.float64]( ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the standard deviation. + See `numojo.std`. + + Parameters: + returned_dtype: The returned data type, defaulting to float64. Args: ddof: Delta degree of freedom. """ - if ddof >= self.size: - raise Error( - String("ddof {} should be smaller than size {}").format( - ddof, self.size - ) - ) + return std[returned_dtype](self, ddof=ddof) - return variance[returned_dtype](self, ddof=ddof) ** 0.5 + fn std[ + returned_dtype: DType = DType.float64 + ](self, axis: Int, ddof: Int = 0) raises -> NDArray[returned_dtype]: + """ + Compute the standard deviation along the axis. + See `numojo.std`. + + Parameters: + returned_dtype: The returned data type, defaulting to float64. + + Args: + axis: The axis along which the mean is performed. + ddof: Delta degree of freedom. + """ + + return std[returned_dtype](self, axis=axis, ddof=ddof) fn sum(self: Self) raises -> Scalar[dtype]: """ @@ -3551,6 +3557,22 @@ struct NDArray[dtype: DType = DType.float64]( """ return variance[returned_dtype](self, ddof=ddof) + fn variance[ + returned_dtype: DType = DType.float64 + ](self, axis: Int, ddof: Int = 0) raises -> NDArray[returned_dtype]: + """ + Returns the variance of array along the axis. + See `numojo.variance`. + + Parameters: + returned_dtype: The returned data type, defaulting to float64. + + Args: + axis: The axis along which the mean is performed. + ddof: Delta degree of freedom. + """ + return variance[returned_dtype](self, axis=axis, ddof=ddof) + # ===----------------------------------------------------------------------===# # NDArrayIterator diff --git a/numojo/routines/manipulation.mojo b/numojo/routines/manipulation.mojo index 6b07974b..31f21418 100644 --- a/numojo/routines/manipulation.mojo +++ b/numojo/routines/manipulation.mojo @@ -302,7 +302,7 @@ fn broadcast_to[ # We compare the shape from the trailing dimensions. var b_strides = NDArrayStrides( - shape + ndim=len(shape), initialized=False ) # Strides of b when refer to data of a for i in range(a.shape.ndim): @@ -418,6 +418,49 @@ fn broadcast_to[ return B^ +fn _broadcast_back_to[ + dtype: DType +](a: NDArray[dtype], shape: NDArrayShape, axis: Int) raises -> NDArray[dtype]: + """ + Broadcasts the array back to the given shape. + If array `b` is the result of array `a` operated along an axis, + it has one dimension less than `a`. + This function can broadcast `b` back to the shape of `a`. + It is a temporary function and should not be used by users. + When `OwnData` is supported, this function will be removed. + Whether broadcasting is possible or not is not checked. + """ + + var a_shape = shape + a_shape[axis] = 1 + + var b_strides = NDArrayStrides( + a_shape + ) # Strides of b when refer to data of a + b_strides[axis] = 0 + + # Start broadcasting. + + var b = NDArray[dtype](shape) # Construct array of targeted shape. + + # Iterate all items in the new array and fill in correct values. + for offset in range(b.size): + var remainder = offset + var indices = Item(ndim=b.ndim, initialized=False) + + for i in range(b.ndim): + indices[i], remainder = divmod( + remainder, + b.strides[i], + ) + + (b._buf.ptr + offset).init_pointee_copy( + a._buf.ptr[_get_offset(indices, b_strides)] + ) + + return b^ + + # ===----------------------------------------------------------------------=== # # Rearranging elements # ===----------------------------------------------------------------------=== # diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index 2bc4416c..2b7b17b1 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -1,3 +1,9 @@ +# ===----------------------------------------------------------------------=== # +# Distributed under the Apache 2.0 License with LLVM Exceptions. +# See LICENSE and the LLVM License for more information. +# https://github.com/Mojo-Numerics-and-Algorithms-group/NuMojo/blob/main/LICENSE +# https://llvm.org/LICENSE.txt +# ===----------------------------------------------------------------------=== # """ Averages and variances """ @@ -13,6 +19,7 @@ import numojo.core.matrix as matrix from numojo.core.matrix import Matrix from numojo.core.utility import bool_to_numeric from numojo.routines.logic.comparison import greater, less +from numojo.routines.manipulation import broadcast_to, _broadcast_back_to from numojo.routines.math.arithmetic import add from numojo.routines.math.sums import sum, cumsum import numojo.routines.math.misc as misc @@ -183,6 +190,10 @@ fn std[ """ Compute the standard deviation. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: A: An array. ddof: Delta degree of freedom. @@ -198,6 +209,48 @@ fn std[ return variance[returned_dtype](A, ddof=ddof) ** 0.5 +fn std[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[ + returned_dtype +]: + """ + Computes the standard deviation along the axis. + + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + + Args: + A: An array. + axis: The axis along which the mean is performed. + ddof: Delta degree of freedom. + + Returns: + An array. + + Raises: + Error: If the axis is out of bounds. + Error: If ddof is not smaller than the size of the axis. + """ + + var normalized_axis = axis + if normalized_axis < 0: + normalized_axis += A.ndim + if (normalized_axis >= A.ndim) or (normalized_axis < 0): + raise Error(String("Axis {} out of bounds!").format(axis)) + + for i in range(A.ndim): + if ddof >= A.shape[i]: + raise Error( + String( + "ddof ({}) should be smaller than size ({}) of axis ({})" + ).format(ddof, A.shape[i], i) + ) + + return variance[returned_dtype](A, axis=normalized_axis, ddof=ddof) ** 0.5 + + fn std[ dtype: DType, //, returned_dtype: DType = DType.float64 ](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: @@ -270,6 +323,66 @@ fn variance[ ) / (A.size - ddof) +fn variance[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[ + returned_dtype +]: + """ + Computes the variance along the axis. + + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + + Args: + A: An array. + axis: The axis along which the mean is performed. + ddof: Delta degree of freedom. + + Returns: + An array. + + Raises: + Error: If the axis is out of bounds. + Error: If ddof is not smaller than the size of the axis. + """ + + var normalized_axis = axis + if normalized_axis < 0: + normalized_axis += A.ndim + if (normalized_axis >= A.ndim) or (normalized_axis < 0): + raise Error(String("Axis {} out of bounds!").format(axis)) + + for i in range(A.ndim): + if ddof >= A.shape[i]: + raise Error( + String( + "ddof ({}) should be smaller than size ({}) of axis ({})" + ).format(ddof, A.shape[i], i) + ) + + return sum( + ( + A.astype[returned_dtype]() + - _broadcast_back_to( + mean[returned_dtype](A, axis=normalized_axis), + A.shape, + axis=normalized_axis, + ) + ) + * ( + A.astype[returned_dtype]() + - _broadcast_back_to( + mean[returned_dtype](A, axis=normalized_axis), + A.shape, + axis=normalized_axis, + ) + ), + axis=normalized_axis, + ) / (A.shape[normalized_axis] - ddof) + + fn variance[ dtype: DType, //, returned_dtype: DType = DType.float64 ](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: diff --git a/tests/routines/test_statistics.mojo b/tests/routines/test_statistics.mojo index abf39f39..097533ed 100644 --- a/tests/routines/test_statistics.mojo +++ b/tests/routines/test_statistics.mojo @@ -12,14 +12,14 @@ from utils_for_test import check, check_is_close def test_mean_median_var_std(): var np = Python.import_module("numpy") - var A = nm.random.randn(10, 10) + var A = nm.random.randn(3, 4, 5) var Anp = A.to_numpy() assert_true( np.all(np.isclose(nm.mean(A), np.mean(Anp), atol=0.1)), "`mean` is broken", ) - for i in range(2): + for i in range(3): check_is_close( nm.mean(A, i), np.mean(Anp, i), @@ -35,8 +35,20 @@ def test_mean_median_var_std(): np.all(np.isclose(nm.variance(A), np.`var`(Anp), atol=0.1)), "`variance` is broken", ) + for i in range(3): + check_is_close( + nm.variance(A, i), + np.`var`(Anp, i), + String("`variance` is broken for {}-dimension".format(i)), + ) assert_true( np.all(np.isclose(nm.std(A), np.std(Anp), atol=0.1)), "`std` is broken", ) + for i in range(3): + check_is_close( + nm.std(A, i), + np.std(Anp, i), + String("`std` is broken for {}-dimension".format(i)), + )