diff --git a/mojoproject.toml b/mojoproject.toml index d1c58429..03e98ec6 100644 --- a/mojoproject.toml +++ b/mojoproject.toml @@ -30,9 +30,10 @@ t = "clear && magic run test" # run individual tests to avoid overheat test_core = "magic run mojo test tests/core -I ./ -I ./tests/" test_creation = "magic run mojo test tests/routines/test_creation.mojo -I ./ -I ./tests/" -test_random = "magic run mojo test tests/routines/test_random.mojo -I ./ -I ./tests/" -test_manipulation = "magic run mojo test tests/routines/test_manipulation.mojo -I ./ -I ./tests/" test_linalg = "magic run mojo test tests/routines/test_linalg.mojo -I ./ -I ./tests/" +test_manipulation = "magic run mojo test tests/routines/test_manipulation.mojo -I ./ -I ./tests/" +test_random = "magic run mojo test tests/routines/test_random.mojo -I ./ -I ./tests/" +test_statistics = "magic run mojo test tests/routines/test_statistics.mojo -I ./ -I ./tests/" # run all final checks before a commit final = "magic run test && magic run format && magic run package" diff --git a/numojo/__init__.mojo b/numojo/__init__.mojo index a78ecbe0..de10f1a8 100644 --- a/numojo/__init__.mojo +++ b/numojo/__init__.mojo @@ -147,16 +147,10 @@ from numojo.routines.math import ( from numojo.routines import statistics from numojo.routines.statistics import ( mean, - meanall, - cummean, mode, median, variance, std, - cumpvariance, - cumvariance, - cumpstdev, - cumstdev, ) from numojo.routines import bitwise diff --git a/numojo/core/complex/complex_ndarray.mojo b/numojo/core/complex/complex_ndarray.mojo index 4daaba34..65b1335b 100644 --- a/numojo/core/complex/complex_ndarray.mojo +++ b/numojo/core/complex/complex_ndarray.mojo @@ -47,7 +47,7 @@ from numojo.routines.math.extrema import maxT, minT from numojo.routines.math.products import prod, cumprod from numojo.routines.math.sums import sum, cumsum import numojo.routines.sorting as sorting -from numojo.routines.statistics.averages import mean, cummean +from numojo.routines.statistics.averages import mean # ===----------------------------------------------------------------------===# diff --git a/numojo/core/matrix.mojo b/numojo/core/matrix.mojo index e58f2013..13dceec6 100644 --- a/numojo/core/matrix.mojo +++ b/numojo/core/matrix.mojo @@ -1020,20 +1020,24 @@ struct Matrix[dtype: DType = DType.float64]( """ return numojo.math.extrema.max(self, axis=axis) - fn mean(self) raises -> Scalar[dtype]: + fn mean[ + returned_dtype: DType = DType.float64 + ](self) raises -> Scalar[returned_dtype]: """ Calculate the arithmetic average of all items in the Matrix. """ - return numojo.statistics.mean(self) + return numojo.statistics.mean[returned_dtype](self) - fn mean(self, axis: Int) raises -> Self: + fn mean[ + returned_dtype: DType = DType.float64 + ](self, axis: Int) raises -> Matrix[returned_dtype]: """ Calculate the arithmetic average of a Matrix along the axis. Args: axis: 0 or 1. """ - return numojo.statistics.mean(self, axis=axis) + return numojo.statistics.mean[returned_dtype](self, axis=axis) fn min(self) raises -> Scalar[dtype]: """ @@ -1103,16 +1107,20 @@ struct Matrix[dtype: DType = DType.float64]( fn round(self, decimals: Int) raises -> Self: return numojo.math.rounding.round(self, decimals=decimals) - fn std(self, ddof: Int = 0) raises -> Scalar[dtype]: + fn std[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the standard deviation. Args: ddof: Delta degree of freedom. """ - return numojo.statistics.std(self, ddof=ddof) + return numojo.statistics.std[returned_dtype](self, ddof=ddof) - fn std(self, axis: Int, ddof: Int = 0) raises -> Self: + fn std[ + returned_dtype: DType = DType.float64 + ](self, axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ Compute the standard deviation along axis. @@ -1120,7 +1128,7 @@ struct Matrix[dtype: DType = DType.float64]( axis: 0 or 1. ddof: Delta degree of freedom. """ - return numojo.statistics.std(self, axis=axis, ddof=ddof) + return numojo.statistics.std[returned_dtype](self, axis=axis, ddof=ddof) fn sum(self) -> Scalar[dtype]: """ @@ -1167,16 +1175,20 @@ struct Matrix[dtype: DType = DType.float64]( fn T(self) -> Self: return transpose(self) - fn variance(self, ddof: Int = 0) raises -> Scalar[dtype]: + fn variance[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the variance. Args: ddof: Delta degree of freedom. """ - return numojo.statistics.variance(self, ddof=ddof) + return numojo.statistics.variance[returned_dtype](self, ddof=ddof) - fn variance(self, axis: Int, ddof: Int = 0) raises -> Self: + fn variance[ + returned_dtype: DType = DType.float64 + ](self, axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ Compute the variance along axis. @@ -1184,7 +1196,9 @@ struct Matrix[dtype: DType = DType.float64]( axis: 0 or 1. ddof: Delta degree of freedom. """ - return numojo.statistics.variance(self, axis=axis, ddof=ddof) + return numojo.statistics.variance[returned_dtype]( + self, axis=axis, ddof=ddof + ) # ===-------------------------------------------------------------------===# # To other data types diff --git a/numojo/core/ndarray.mojo b/numojo/core/ndarray.mojo index 87184074..103d6827 100644 --- a/numojo/core/ndarray.mojo +++ b/numojo/core/ndarray.mojo @@ -56,7 +56,7 @@ import numojo.routines.math.rounding as rounding import numojo.routines.bitwise as bitwise import numojo.routines.linalg as linalg from numojo.core.datatypes import TypeCoercion, _concise_dtype_str -from numojo.routines.statistics.averages import mean, cummean +from numojo.routines.statistics.averages import mean from numojo.routines.math.products import prod, cumprod from numojo.routines.math.sums import sum, cumsum from numojo.routines.logic.truth import any @@ -255,11 +255,15 @@ struct NDArray[dtype: DType = DType.float64]( @always_inline("nodebug") fn __del__(owned self): + """ + Destroys all elements in the list and free its memory. + """ var owndata = True try: owndata = self.flags["OWNDATA"] except: print("Invalid `OWNDATA` flag. Treat as `True`.") + if owndata: self._buf.ptr.free() @@ -1850,6 +1854,13 @@ struct NDArray[dtype: DType = DType.float64]( fn __pow__(self, p: Int) -> Self: return self._elementwise_pow(p) + fn __pow__(self, rhs: Scalar[dtype]) raises -> Self: + """Power of items.""" + var res = self + for i in range(self.size): + res._buf.ptr[i] = self._buf.ptr[i].__pow__(rhs) + return res^ + fn __pow__(self, p: Self) raises -> Self: if self.size != p.size: raise Error( @@ -3243,26 +3254,29 @@ struct NDArray[dtype: DType = DType.float64]( return result - fn mean(self: Self, axis: Int) raises -> Self: + fn mean[ + returned_dtype: DType = DType.float64 + ](self: Self, axis: Int) raises -> NDArray[returned_dtype]: """ Mean of array elements over a given axis. + Args: - array: NDArray. axis: The axis along which the mean is performed. + Returns: An NDArray. """ - return mean(self, axis) + return mean[returned_dtype](self, axis) fn mean(self) raises -> Scalar[dtype]: """ - Cumulative mean of a array. + Mean of a array. Returns: - The cumulative mean of the array as a SIMD Value of `dtype`. + The mean of the array as a SIMD Value of `dtype`. """ - return cummean[dtype](self) + return mean[dtype](self) # fn nonzero(self): # pass @@ -3409,6 +3423,25 @@ struct NDArray[dtype: DType = DType.float64]( var I = NDArray[DType.index](self.shape) sorting._sort_inplace(self, I, axis=axis) + fn std[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: + """ + Compute the standard deviation. + + Args: + ddof: Delta degree of freedom. + """ + + if ddof >= self.size: + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, self.size + ) + ) + + return variance[returned_dtype](self, ddof=ddof) ** 0.5 + fn sum(self: Self) raises -> Scalar[dtype]: """ Sum of all array elements. @@ -3506,6 +3539,20 @@ struct NDArray[dtype: DType = DType.float64]( """ return self._buf.ptr + fn variance[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: + """ + Returns the variance of array. + + Parameters: + returned_dtype: The returned data type, defaulting to float64. + + Args: + ddof: Delta degree of freedom. + """ + return variance[returned_dtype](self, ddof=ddof) + # ===----------------------------------------------------------------------===# # NDArrayIterator diff --git a/numojo/routines/math/sums.mojo b/numojo/routines/math/sums.mojo index 6d8eddda..526635d3 100644 --- a/numojo/routines/math/sums.mojo +++ b/numojo/routines/math/sums.mojo @@ -39,9 +39,7 @@ fn sum[dtype: DType](A: NDArray[dtype]) -> Scalar[dtype]: return res -fn sum[ - dtype: DType -](A: NDArray[dtype], owned axis: Int) raises -> NDArray[dtype]: +fn sum[dtype: DType](A: NDArray[dtype], axis: Int) raises -> NDArray[dtype]: """ Returns sums of array elements over a given axis. @@ -52,6 +50,10 @@ fn sum[ print(nm.sum(A, axis=0)) ``` + Raises: + Error: If the axis is out of bound. + Error: If the number of dimensions is 1. + Args: A: NDArray. axis: The axis along which the sum is performed. @@ -60,25 +62,35 @@ fn sum[ An NDArray. """ - if axis < 0: - axis += A.ndim - if (axis < 0) or (axis >= A.ndim): + var normalized_axis = axis + if normalized_axis < 0: + normalized_axis += A.ndim + + if (normalized_axis < 0) or (normalized_axis >= A.ndim): raise Error( - String("Invalid index: index out of bound [0, {}).").format(A.ndim) + String("Axis {} out of bound [0, {}).").format(axis, A.ndim) + ) + if A.ndim == 1: + raise Error( + String( + "`numojo.routines.math.sums.sum()`: " + "Cannot sum over axis for 1-d array. " + "Please remove the `axis` argument." + ) ) var result_shape: List[Int] = List[Int]() - var size_of_axis: Int = A.shape[axis] + var size_of_axis: Int = A.shape[normalized_axis] var slices: List[Slice] = List[Slice]() for i in range(A.ndim): - if i != axis: + if i != normalized_axis: result_shape.append(A.shape[i]) slices.append(Slice(0, A.shape[i])) else: slices.append(Slice(0, 0)) # Temp value var result = zeros[dtype](NDArrayShape(result_shape)) for i in range(size_of_axis): - slices[axis] = Slice(i, i + 1) + slices[normalized_axis] = Slice(i, i + 1) var arr_slice = A[slices] result += arr_slice diff --git a/numojo/routines/random.mojo b/numojo/routines/random.mojo index 366de311..97194094 100644 --- a/numojo/routines/random.mojo +++ b/numojo/routines/random.mojo @@ -323,18 +323,7 @@ fn randn[ a normal distribution with given mean and variance. """ - builtin_random.seed() - - var result: NDArray[dtype] = NDArray[dtype](shape) - - builtin_random.randn[dtype]( - ptr=result._buf.ptr, - size=result.size, - mean=mean.cast[DType.float64](), - variance=variance.cast[DType.float64](), - ) - - return result^ + return randn[dtype](shape) * mt.sqrt(variance) + mean fn randn[ diff --git a/numojo/routines/statistics/__init__.mojo b/numojo/routines/statistics/__init__.mojo index c025eb9d..253b83dd 100644 --- a/numojo/routines/statistics/__init__.mojo +++ b/numojo/routines/statistics/__init__.mojo @@ -1,15 +1,9 @@ from .averages import ( mean, - meanall, max, min, - cummean, mode, median, variance, std, - cumpvariance, - cumvariance, - cumpstdev, - cumstdev, ) diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index 9ee064e6..2bc4416c 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -6,7 +6,7 @@ Averages and variances # ===----------------------------------------------------------------------=== # from collections.optional import Optional -from math import sqrt +import math as mt from numojo.core.ndarray import NDArray import numojo.core.matrix as matrix @@ -14,118 +14,110 @@ from numojo.core.matrix import Matrix from numojo.core.utility import bool_to_numeric from numojo.routines.logic.comparison import greater, less from numojo.routines.math.arithmetic import add -from numojo.routines.sorting import binary_sort from numojo.routines.math.sums import sum, cumsum import numojo.routines.math.misc as misc +from numojo.routines.sorting import sort fn mean[ - dtype: DType -](array: NDArray[dtype], axis: Int) raises -> NDArray[dtype]: + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: NDArray[dtype]) raises -> Scalar[returned_dtype]: """ - Mean of array elements over a given axis. + Calculate the arithmetic average of all items in the array. + + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: - array: NDArray. - axis: The axis along which the mean is performed. + a: NDArray. + Returns: - An NDArray. + A scalar defaulting to float64. """ - return sum(array, axis) / Scalar[array.dtype](array.shape[axis]) + return sum(a).cast[returned_dtype]() / a.size fn mean[ - dtype: DType, //, dtype_out: DType = DType.float64 -](a: NDArray[dtype]) raises -> Scalar[dtype_out]: + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: NDArray[dtype], axis: Int) raises -> NDArray[returned_dtype]: """ - Calculate the arithmetic average of all items in the array. + Mean of array elements over a given axis. + + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: a: NDArray. + axis: The axis along which the mean is performed. Returns: - A scalar. - + An NDArray. """ - return sum(a).cast[dtype_out]() / a.size - -fn mean[dtype: DType](A: Matrix[dtype]) -> Scalar[dtype]: - """ - Calculate the arithmetic average of all items in the Matrix. + var normalized_axis = axis - Args: - A: Matrix. - """ + if axis < 0: + normalized_axis += a.ndim - return sum(A) / A.size + if (normalized_axis < 0) or (normalized_axis >= a.ndim): + raise Error(String("Axis {} out of bounds!").format(axis)) + return ( + sum(a, axis=normalized_axis).astype[returned_dtype]() + / a.shape[normalized_axis] + ) -fn mean[dtype: DType](A: Matrix[dtype], axis: Int) raises -> Matrix[dtype]: - """ - Calculate the arithmetic average of a Matrix along the axis. - Args: - A: Matrix. - axis: 0 or 1. +fn mean[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: Matrix[dtype]) -> Scalar[returned_dtype]: """ + Calculate the arithmetic average of all items in the Matrix. - if axis == 0: - return sum(A, axis=0) / A.shape[0] - elif axis == 1: - return sum(A, axis=1) / A.shape[1] - else: - raise Error(String("The axis can either be 1 or 0!")) - - -fn meanall(array: NDArray) raises -> Float64: - """Mean of all items in the array. - - Example: - ```console - > print(A) - [[ 0.1315377950668335 0.458650141954422 0.21895918250083923 ] - [ 0.67886471748352051 0.93469291925430298 0.51941639184951782 ] - [ 0.034572109580039978 0.52970021963119507 0.007698186207562685 ]] - 2-D array Shape: [3, 3] DType: float32 - - > print(nm.math.stats.meanall(A)) - 0.39045463667975533 - ``` + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: - array: NDArray. + a: A matrix. Returns: - Float64. + A scalar of the returned data type. """ - return ( - sum(array).cast[DType.float64]() - / Int32(array.size).cast[DType.float64]() - ) + return sum(a).cast[returned_dtype]() / a.size -fn cummean[ - dtype: DType = DType.float64 -](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: - """Arithmatic mean of all items of an array. +fn mean[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: Matrix[dtype], axis: Int) raises -> Matrix[returned_dtype]: + """ + Calculate the arithmetic average of a Matrix along the axis. Parameters: - dtype: The element type. + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: - array: An NDArray. + a: A matrix. + axis: The axis along which the mean is performed. Returns: - The mean of all of the member values of array as a SIMD Value of `dtype`. + A matrix of the returned data type. """ - return sum[dtype](array) / (array.num_elements()) + + if axis == 0: + return sum(a, axis=0).astype[returned_dtype]() / a.shape[0] + elif axis == 1: + return sum(a, axis=1).astype[returned_dtype]() / a.shape[1] + else: + raise Error(String("The axis can either be 1 or 0!")) -fn mode[ - dtype: DType = DType.float64 -](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: +fn mode[dtype: DType](array: NDArray[dtype]) raises -> Scalar[dtype]: """Mode of all items of an array. Parameters: @@ -137,7 +129,8 @@ fn mode[ Returns: The mode of all of the member values of array as a SIMD Value of `dtype`. """ - var sorted_array: NDArray[dtype] = binary_sort[dtype](array) + + var sorted_array: NDArray[dtype] = sort(array) var max_count = 0 var mode_value = sorted_array.item(0) var current_count = 1 @@ -157,14 +150,15 @@ fn mode[ return mode_value -# * IMPLEMENT median high and low fn median[ - dtype: DType = DType.float64 -](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: - """Median value of all items of an array. + dtype: DType, //, returned_dtype: DType = DType.float64 +](array: NDArray[dtype]) raises -> Scalar[returned_dtype]: + """ + Median value of all items of an array. Parameters: dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: array: An NDArray. @@ -172,49 +166,47 @@ fn median[ Returns: The median of all of the member values of array as a SIMD Value of `dtype`. """ - var sorted_array = binary_sort[dtype](array) - var n = array.num_elements() - if n % 2 == 1: - return sorted_array.item(n // 2) + var sorted_array = sort(array) + + if array.size % 2 == 1: + return sorted_array.item(array.size // 2).cast[returned_dtype]() else: - return (sorted_array.item(n // 2 - 1) + sorted_array.item(n // 2)) / 2 + return ( + sorted_array.item(array.size // 2 - 1) + + sorted_array.item(array.size // 2) + ).cast[returned_dtype]() / 2 -fn std[dtype: DType](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[dtype]: +fn std[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the standard deviation. Args: - A: Matrix. + A: An array. ddof: Delta degree of freedom. """ if ddof >= A.size: - raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) - return variance(A, ddof=ddof) ** 0.5 + return variance[returned_dtype](A, ddof=ddof) ** 0.5 fn std[ - dtype: DType -](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[dtype]: + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: """ - Compute the standard deviation along axis. - - Args: - A: Matrix. - axis: 0 or 1. - ddof: Delta degree of freedom. - """ - - return variance(A, axis, ddof=ddof) ** 0.5 - + Compute the standard deviation. -fn variance[ - dtype: DType -](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[dtype]: - """ - Compute the variance. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: A: Matrix. @@ -222,16 +214,24 @@ fn variance[ """ if ddof >= A.size: - raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) - return sum((A - mean(A)) * (A - mean(A))) / (A.size - ddof) + return variance[returned_dtype](A, ddof=ddof) ** 0.5 -fn variance[ - dtype: DType -](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[dtype]: +fn std[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ - Compute the variance along axis. + Compute the standard deviation along axis. + + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: A: Matrix. @@ -239,125 +239,99 @@ fn variance[ ddof: Delta degree of freedom. """ - if (ddof >= A.shape[0]) or (ddof >= A.shape[1]): - raise Error( - String( - "ddof {ddof} should be smaller than size" - " {A.shape[0]}x{A.shape[1]}" - ) - ) - - if axis == 0: - return sum((A - mean(A, axis=0)) * (A - mean(A, axis=0)), axis=0) / ( - A.shape[0] - ddof - ) - elif axis == 1: - return sum((A - mean(A, axis=1)) * (A - mean(A, axis=1)), axis=1) / ( - A.shape[1] - ddof - ) - else: - raise Error(String("The axis can either be 1 or 0!")) + return variance[returned_dtype](A, axis, ddof=ddof) ** 0.5 -fn cumpvariance[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: +fn variance[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: """ - Population variance of a array. + Compute the variance. Parameters: - dtype: The element type.. + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: - array: A NDArray. - mu: The mean of the array, if provided. - Returns: - The variance of all of the member values of array as a SIMD Value of `dtype`. + A: An array. + ddof: Delta degree of freedom. """ - var mean_value: Scalar[dtype] - if not mu: - mean_value = cummean[dtype](array) - else: - mean_value = mu.value() - - var result = Scalar[dtype]() - for i in range(array.num_elements()): - result += (array.load(i) - mean_value) ** 2 + if ddof >= A.size: + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) - return sqrt(result / (array.num_elements())) + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A)) + ) / (A.size - ddof) -fn cumvariance[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: +fn variance[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: """ - Variance of a array. + Compute the variance. Parameters: - dtype: The element type. + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: - array: A NDArray. - mu: The mean of the array, if provided. - - Returns: - The variance of all of the member values of array as a SIMD Value of `dtype`. + A: Matrix. + ddof: Delta degree of freedom. """ - var mean_value: Scalar[dtype] - - if not mu: - mean_value = cummean[dtype](array) - else: - mean_value = mu.value() - var result = Scalar[dtype]() - for i in range(array.num_elements()): - result += (array.load(i) - mean_value) ** 2 + if ddof >= A.size: + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) - return sqrt(result / (array.num_elements() - 1)) + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A)) + ) / (A.size - ddof) -fn cumpstdev[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: +fn variance[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ - Population standard deviation of a array. + Compute the variance along axis. Parameters: - dtype: The element type. + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: - array: A NDArray. - mu: The mean of the array, if provided. - - Returns: - The standard deviation of all of the member values of array as a SIMD Value of `dtype`. - """ - return sqrt(cumpvariance[dtype](array, mu)) - - -fn cumstdev[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: + A: Matrix. + axis: 0 or 1. + ddof: Delta degree of freedom. """ - Standard deviation of a array. - Parameters: - dtype: The element type. + if (ddof >= A.shape[0]) or (ddof >= A.shape[1]): + raise Error( + String("ddof {} should be smaller than size {}x{}").format( + ddof, A.shape[0], A.shape[1] + ) + ) - Args: - array: A NDArray. - mu: The mean of the array, if provided. - Returns: - The standard deviation of all of the member values of array as a SIMD Value of `dtype`. - """ - return sqrt(cumvariance[dtype](array, mu)) + if axis == 0: + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)), + axis=0, + ) / (A.shape[0] - ddof) + elif axis == 1: + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)), + axis=1, + ) / (A.shape[1] - ddof) + else: + raise Error(String("The axis can either be 1 or 0!")) diff --git a/tests/core/test_matrix.mojo b/tests/core/test_matrix.mojo index e866cbf4..342bbd24 100644 --- a/tests/core/test_matrix.mojo +++ b/tests/core/test_matrix.mojo @@ -316,50 +316,6 @@ def test_hyperbolic(): check_matrices_close(nm.atanh(B), np.arctanh(Bnp), "atanh is broken") -# ===-----------------------------------------------------------------------===# -# Statistics -# ===-----------------------------------------------------------------------===# - - -def test_statistics(): - var np = Python.import_module("numpy") - var A = Matrix.rand[f64]((100, 100)) - var Anp = np.matrix(A.to_numpy()) - - assert_true( - np.all(np.isclose(nm.mean(A), np.mean(Anp), atol=0.1)), - "`mean` is broken", - ) - for i in range(2): - check_matrices_close( - nm.mean(A, i), - np.mean(Anp, i), - String("`mean` is broken for {i}-dimension"), - ) - - assert_true( - np.all(np.isclose(nm.variance(A), np.`var`(Anp), atol=0.1)), - "`variance` is broken", - ) - for i in range(2): - check_matrices_close( - nm.variance(A, i), - np.`var`(Anp, i), - String("`variance` is broken for {i}-dimension"), - ) - - assert_true( - np.all(np.isclose(nm.std(A), np.std(Anp), atol=0.1)), - "`std` is broken", - ) - for i in range(2): - check_matrices_close( - nm.std(A, i), - np.std(Anp, i), - String("`std` is broken for {i}-dimension"), - ) - - def test_sorting(): var np = Python.import_module("numpy") var A = Matrix.rand[f64]((10, 10)) diff --git a/tests/routines/test_random.mojo b/tests/routines/test_random.mojo index 786b1d10..9c741000 100644 --- a/tests/routines/test_random.mojo +++ b/tests/routines/test_random.mojo @@ -1,3 +1,4 @@ +from math import sqrt import numojo as nm from numojo.prelude import * from python import Python, PythonObject @@ -17,8 +18,8 @@ def test_randminmax(): """Test random array generation with min and max values.""" var arr_variadic = nm.random.rand[nm.f64](10, 10, 10, min=1, max=2) var arr_list = nm.random.rand[nm.f64](List[Int](10, 10, 10), min=3, max=4) - var arr_variadic_mean = nm.cummean(arr_variadic) - var arr_list_mean = nm.cummean(arr_list) + var arr_variadic_mean = nm.mean(arr_variadic) + var arr_list_mean = nm.mean(arr_list) assert_almost_equal( arr_variadic_mean, 1.5, @@ -55,22 +56,18 @@ def test_randint(): def test_randn(): """Test random array generation with normal distribution.""" - var arr_variadic_01 = nm.random.randn[nm.f64]( - 20, 20, 20, mean=0.0, variance=1.0 - ) + var arr_variadic_01 = nm.random.randn[nm.f64](20, 20, 20) var arr_variadic_31 = nm.random.randn[nm.f64]( - 20, 20, 20, mean=3.0, variance=1.0 - ) - var arr_variadic_12 = nm.random.randn[nm.f64]( - 20, 20, 20, mean=1.0, variance=3.0 + Shape(20, 20, 20), mean=3, variance=1 ) + var arr_variadic_12 = nm.random.randn[nm.f64](Shape(20, 20, 20), 1, 2) - var arr_variadic_mean01 = nm.cummean(arr_variadic_01) - var arr_variadic_mean31 = nm.cummean(arr_variadic_31) - var arr_variadic_mean12 = nm.cummean(arr_variadic_12) - var arr_variadic_var01 = nm.cumvariance(arr_variadic_01) - var arr_variadic_var31 = nm.cumvariance(arr_variadic_31) - var arr_variadic_var12 = nm.cumvariance(arr_variadic_12) + var arr_variadic_mean01 = nm.mean(arr_variadic_01) + var arr_variadic_mean31 = nm.mean(arr_variadic_31) + var arr_variadic_mean12 = nm.mean(arr_variadic_12) + var arr_variadic_var01 = nm.variance(arr_variadic_01) + var arr_variadic_var31 = nm.variance(arr_variadic_31) + var arr_variadic_var12 = nm.variance(arr_variadic_12) assert_almost_equal( arr_variadic_mean01, @@ -105,7 +102,7 @@ def test_randn(): ) assert_almost_equal( arr_variadic_var12, - 3, + 2, msg="Variance of random array with mean 1 and variance 2", atol=0.1, ) @@ -113,22 +110,16 @@ def test_randn(): def test_randn_list(): """Test random array generation with normal distribution.""" - var arr_list_01 = nm.random.randn[nm.f64]( - List[Int](20, 20, 20), mean=0.0, variance=1.0 - ) - var arr_list_31 = nm.random.randn[nm.f64]( - List[Int](20, 20, 20), mean=3.0, variance=1.0 - ) - var arr_list_12 = nm.random.randn[nm.f64]( - List[Int](20, 20, 20), mean=1.0, variance=2.0 - ) + var arr_list_01 = nm.random.randn[nm.f64](Shape(20, 20, 20)) + var arr_list_31 = nm.random.randn[nm.f64](Shape(20, 20, 20)) + 3 + var arr_list_12 = nm.random.randn[nm.f64](Shape(20, 20, 20)) * sqrt(2.0) + 1 - var arr_list_mean01 = nm.cummean(arr_list_01) - var arr_list_mean31 = nm.cummean(arr_list_31) - var arr_list_mean12 = nm.cummean(arr_list_12) - var arr_list_var01 = nm.cumvariance(arr_list_01) - var arr_list_var31 = nm.cumvariance(arr_list_31) - var arr_list_var12 = nm.cumvariance(arr_list_12) + var arr_list_mean01 = nm.mean(arr_list_01) + var arr_list_mean31 = nm.mean(arr_list_31) + var arr_list_mean12 = nm.mean(arr_list_12) + var arr_list_var01 = nm.variance(arr_list_01) + var arr_list_var31 = nm.variance(arr_list_31) + var arr_list_var12 = nm.variance(arr_list_12) assert_almost_equal( arr_list_mean01, @@ -178,8 +169,8 @@ def test_rand_exponential(): List[Int](20, 20, 20), scale=0.5 ) - var arr_variadic_mean = nm.cummean(arr_variadic) - var arr_list_mean = nm.cummean(arr_list) + var arr_variadic_mean = nm.mean(arr_variadic) + var arr_list_mean = nm.mean(arr_list) # For exponential distribution, mean = 1 / rate assert_almost_equal( @@ -196,18 +187,18 @@ def test_rand_exponential(): ) # For exponential distribution, variance = 1 / (rate^2) - var arr_variadic_var = nm.cumvariance(arr_variadic) - var arr_list_var = nm.cumvariance(arr_list) + var arr_variadic_var = nm.variance(arr_variadic) + var arr_list_var = nm.variance(arr_list) assert_almost_equal( arr_variadic_var, - 1 / (2), + 1 / 2**2, msg="Variance of exponential distribution with rate 2.0", atol=0.1, ) assert_almost_equal( arr_list_var, - 1 / (0.5), + 1 / 0.5**2, msg="Variance of exponential distribution with rate 0.5", atol=0.5, ) diff --git a/tests/routines/test_statistics.mojo b/tests/routines/test_statistics.mojo new file mode 100644 index 00000000..abf39f39 --- /dev/null +++ b/tests/routines/test_statistics.mojo @@ -0,0 +1,42 @@ +import numojo as nm +from numojo.prelude import * +from numojo.core.matrix import Matrix +from python import Python, PythonObject +from testing.testing import assert_raises, assert_true +from utils_for_test import check, check_is_close + +# ===-----------------------------------------------------------------------===# +# Statistics +# ===-----------------------------------------------------------------------===# + + +def test_mean_median_var_std(): + var np = Python.import_module("numpy") + var A = nm.random.randn(10, 10) + var Anp = A.to_numpy() + + assert_true( + np.all(np.isclose(nm.mean(A), np.mean(Anp), atol=0.1)), + "`mean` is broken", + ) + for i in range(2): + check_is_close( + nm.mean(A, i), + np.mean(Anp, i), + String("`mean` is broken for {}-dimension".format(i)), + ) + + assert_true( + np.all(np.isclose(nm.median(A), np.median(Anp), atol=0.1)), + "`median` is broken", + ) + + assert_true( + np.all(np.isclose(nm.variance(A), np.`var`(Anp), atol=0.1)), + "`variance` is broken", + ) + + assert_true( + np.all(np.isclose(nm.std(A), np.std(Anp), atol=0.1)), + "`std` is broken", + )