From c25ba0e588256efcea4bec65a8cab4365fdb5bdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 13:22:42 +0100 Subject: [PATCH 01/13] Update __del__ docstring --- numojo/core/ndarray.mojo | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/numojo/core/ndarray.mojo b/numojo/core/ndarray.mojo index 87184074..1cc1055c 100644 --- a/numojo/core/ndarray.mojo +++ b/numojo/core/ndarray.mojo @@ -255,11 +255,15 @@ struct NDArray[dtype: DType = DType.float64]( @always_inline("nodebug") fn __del__(owned self): + """ + Destroys all elements in the list and free its memory. + """ var owndata = True try: owndata = self.flags["OWNDATA"] except: print("Invalid `OWNDATA` flag. Treat as `True`.") + if owndata: self._buf.ptr.free() From 8547335a76ad444ccdc20e7d498542f7167fc6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 13:44:29 +0100 Subject: [PATCH 02/13] Add `mean` that returns scalar. Remove `meanall`. --- numojo/routines/statistics/averages.mojo | 57 ++++++++++-------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index 401f177f..54149c70 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -6,7 +6,7 @@ Averages and variances # ===----------------------------------------------------------------------=== # from collections.optional import Optional -from math import sqrt +import math as mt from numojo.core.ndarray import NDArray import numojo.core.matrix as matrix @@ -19,6 +19,25 @@ from numojo.routines.math.sums import sum, cumsum import numojo.routines.math.misc as misc +fn mean[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: NDArray[dtype]) raises -> Scalar[returned_dtype]: + """ + Calculate the arithmetic average of all items in the array. + + parameters: + returned_dtype: The returned data type, defaulting to float64. + + Args: + a: NDArray. + + Returns: + A scalar defaulting to float64. + + """ + return sum(a).cast[returned_dtype]() / a.size + + fn mean(array: NDArray, axis: Int = 0) raises -> NDArray[array.dtype]: """ Mean of array elements over a given axis. @@ -60,34 +79,6 @@ fn mean[dtype: DType](A: Matrix[dtype], axis: Int) raises -> Matrix[dtype]: raise Error(String("The axis can either be 1 or 0!")) -fn meanall(array: NDArray) raises -> Float64: - """Mean of all items in the array. - - Example: - ```console - > print(A) - [[ 0.1315377950668335 0.458650141954422 0.21895918250083923 ] - [ 0.67886471748352051 0.93469291925430298 0.51941639184951782 ] - [ 0.034572109580039978 0.52970021963119507 0.007698186207562685 ]] - 2-D array Shape: [3, 3] DType: float32 - - > print(nm.math.stats.meanall(A)) - 0.39045463667975533 - ``` - - Args: - array: NDArray. - - Returns: - Float64. - """ - - return ( - sum(array).cast[DType.float64]() - / Int32(array.size).cast[DType.float64]() - ) - - fn cummean[ dtype: DType = DType.float64 ](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: @@ -269,7 +260,7 @@ fn cumpvariance[ for i in range(array.num_elements()): result += (array.load(i) - mean_value) ** 2 - return sqrt(result / (array.num_elements())) + return mt.sqrt(result / (array.num_elements())) fn cumvariance[ @@ -301,7 +292,7 @@ fn cumvariance[ for i in range(array.num_elements()): result += (array.load(i) - mean_value) ** 2 - return sqrt(result / (array.num_elements() - 1)) + return mt.sqrt(result / (array.num_elements() - 1)) fn cumpstdev[ @@ -322,7 +313,7 @@ fn cumpstdev[ Returns: The standard deviation of all of the member values of array as a SIMD Value of `dtype`. """ - return sqrt(cumpvariance[dtype](array, mu)) + return mt.sqrt(cumpvariance[dtype](array, mu)) fn cumstdev[ @@ -342,4 +333,4 @@ fn cumstdev[ Returns: The standard deviation of all of the member values of array as a SIMD Value of `dtype`. """ - return sqrt(cumvariance[dtype](array, mu)) + return mt.sqrt(cumvariance[dtype](array, mu)) From bf8c63ac9d9f64954f712b062f156326578bb242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 13:44:42 +0100 Subject: [PATCH 03/13] Remove `cummean` --- numojo/routines/statistics/averages.mojo | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index 54149c70..4f88a874 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -79,23 +79,6 @@ fn mean[dtype: DType](A: Matrix[dtype], axis: Int) raises -> Matrix[dtype]: raise Error(String("The axis can either be 1 or 0!")) -fn cummean[ - dtype: DType = DType.float64 -](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: - """Arithmatic mean of all items of an array. - - Parameters: - dtype: The element type. - - Args: - array: An NDArray. - - Returns: - The mean of all of the member values of array as a SIMD Value of `dtype`. - """ - return sum[dtype](array) / (array.num_elements()) - - fn mode[ dtype: DType = DType.float64 ](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: From 76d3b1cbedb967a4acac7bf21e17c695966dbacc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 16:56:01 +0100 Subject: [PATCH 04/13] Define returned dtype for other statistic functions --- numojo/__init__.mojo | 2 - numojo/core/complex/complex_ndarray.mojo | 2 +- numojo/core/matrix.mojo | 38 +++++--- numojo/core/ndarray.mojo | 17 ++-- numojo/routines/statistics/__init__.mojo | 2 - numojo/routines/statistics/averages.mojo | 110 ++++++++++++++++------- tests/routines/test_random.mojo | 20 ++--- 7 files changed, 126 insertions(+), 65 deletions(-) diff --git a/numojo/__init__.mojo b/numojo/__init__.mojo index a78ecbe0..a4ee84d6 100644 --- a/numojo/__init__.mojo +++ b/numojo/__init__.mojo @@ -147,8 +147,6 @@ from numojo.routines.math import ( from numojo.routines import statistics from numojo.routines.statistics import ( mean, - meanall, - cummean, mode, median, variance, diff --git a/numojo/core/complex/complex_ndarray.mojo b/numojo/core/complex/complex_ndarray.mojo index 4daaba34..65b1335b 100644 --- a/numojo/core/complex/complex_ndarray.mojo +++ b/numojo/core/complex/complex_ndarray.mojo @@ -47,7 +47,7 @@ from numojo.routines.math.extrema import maxT, minT from numojo.routines.math.products import prod, cumprod from numojo.routines.math.sums import sum, cumsum import numojo.routines.sorting as sorting -from numojo.routines.statistics.averages import mean, cummean +from numojo.routines.statistics.averages import mean # ===----------------------------------------------------------------------===# diff --git a/numojo/core/matrix.mojo b/numojo/core/matrix.mojo index e58f2013..13dceec6 100644 --- a/numojo/core/matrix.mojo +++ b/numojo/core/matrix.mojo @@ -1020,20 +1020,24 @@ struct Matrix[dtype: DType = DType.float64]( """ return numojo.math.extrema.max(self, axis=axis) - fn mean(self) raises -> Scalar[dtype]: + fn mean[ + returned_dtype: DType = DType.float64 + ](self) raises -> Scalar[returned_dtype]: """ Calculate the arithmetic average of all items in the Matrix. """ - return numojo.statistics.mean(self) + return numojo.statistics.mean[returned_dtype](self) - fn mean(self, axis: Int) raises -> Self: + fn mean[ + returned_dtype: DType = DType.float64 + ](self, axis: Int) raises -> Matrix[returned_dtype]: """ Calculate the arithmetic average of a Matrix along the axis. Args: axis: 0 or 1. """ - return numojo.statistics.mean(self, axis=axis) + return numojo.statistics.mean[returned_dtype](self, axis=axis) fn min(self) raises -> Scalar[dtype]: """ @@ -1103,16 +1107,20 @@ struct Matrix[dtype: DType = DType.float64]( fn round(self, decimals: Int) raises -> Self: return numojo.math.rounding.round(self, decimals=decimals) - fn std(self, ddof: Int = 0) raises -> Scalar[dtype]: + fn std[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the standard deviation. Args: ddof: Delta degree of freedom. """ - return numojo.statistics.std(self, ddof=ddof) + return numojo.statistics.std[returned_dtype](self, ddof=ddof) - fn std(self, axis: Int, ddof: Int = 0) raises -> Self: + fn std[ + returned_dtype: DType = DType.float64 + ](self, axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ Compute the standard deviation along axis. @@ -1120,7 +1128,7 @@ struct Matrix[dtype: DType = DType.float64]( axis: 0 or 1. ddof: Delta degree of freedom. """ - return numojo.statistics.std(self, axis=axis, ddof=ddof) + return numojo.statistics.std[returned_dtype](self, axis=axis, ddof=ddof) fn sum(self) -> Scalar[dtype]: """ @@ -1167,16 +1175,20 @@ struct Matrix[dtype: DType = DType.float64]( fn T(self) -> Self: return transpose(self) - fn variance(self, ddof: Int = 0) raises -> Scalar[dtype]: + fn variance[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the variance. Args: ddof: Delta degree of freedom. """ - return numojo.statistics.variance(self, ddof=ddof) + return numojo.statistics.variance[returned_dtype](self, ddof=ddof) - fn variance(self, axis: Int, ddof: Int = 0) raises -> Self: + fn variance[ + returned_dtype: DType = DType.float64 + ](self, axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ Compute the variance along axis. @@ -1184,7 +1196,9 @@ struct Matrix[dtype: DType = DType.float64]( axis: 0 or 1. ddof: Delta degree of freedom. """ - return numojo.statistics.variance(self, axis=axis, ddof=ddof) + return numojo.statistics.variance[returned_dtype]( + self, axis=axis, ddof=ddof + ) # ===-------------------------------------------------------------------===# # To other data types diff --git a/numojo/core/ndarray.mojo b/numojo/core/ndarray.mojo index 1cc1055c..0f091749 100644 --- a/numojo/core/ndarray.mojo +++ b/numojo/core/ndarray.mojo @@ -56,7 +56,7 @@ import numojo.routines.math.rounding as rounding import numojo.routines.bitwise as bitwise import numojo.routines.linalg as linalg from numojo.core.datatypes import TypeCoercion, _concise_dtype_str -from numojo.routines.statistics.averages import mean, cummean +from numojo.routines.statistics.averages import mean from numojo.routines.math.products import prod, cumprod from numojo.routines.math.sums import sum, cumsum from numojo.routines.logic.truth import any @@ -3247,26 +3247,29 @@ struct NDArray[dtype: DType = DType.float64]( return result - fn mean(self: Self, axis: Int) raises -> Self: + fn mean[ + returned_dtype: DType = DType.float64 + ](self: Self, axis: Int) raises -> NDArray[returned_dtype]: """ Mean of array elements over a given axis. + Args: - array: NDArray. axis: The axis along which the mean is performed. + Returns: An NDArray. """ - return mean(self, axis) + return mean[returned_dtype](self, axis) fn mean(self) raises -> Scalar[dtype]: """ - Cumulative mean of a array. + Mean of a array. Returns: - The cumulative mean of the array as a SIMD Value of `dtype`. + The mean of the array as a SIMD Value of `dtype`. """ - return cummean[dtype](self) + return mean[dtype](self) # fn nonzero(self): # pass diff --git a/numojo/routines/statistics/__init__.mojo b/numojo/routines/statistics/__init__.mojo index c025eb9d..df6d0007 100644 --- a/numojo/routines/statistics/__init__.mojo +++ b/numojo/routines/statistics/__init__.mojo @@ -1,9 +1,7 @@ from .averages import ( mean, - meanall, max, min, - cummean, mode, median, variance, diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index 4f88a874..e332b287 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -25,7 +25,8 @@ fn mean[ """ Calculate the arithmetic average of all items in the array. - parameters: + Parameters: + dtype: The element type. returned_dtype: The returned data type, defaulting to float64. Args: @@ -38,43 +39,81 @@ fn mean[ return sum(a).cast[returned_dtype]() / a.size -fn mean(array: NDArray, axis: Int = 0) raises -> NDArray[array.dtype]: +fn mean[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: NDArray[dtype], axis: Int) raises -> NDArray[returned_dtype]: """ Mean of array elements over a given axis. + + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: - array: NDArray. + a: NDArray. axis: The axis along which the mean is performed. + Returns: An NDArray. - """ - return sum(array, axis) / Scalar[array.dtype](array.shape[axis]) + var normalized_axis = axis -fn mean[dtype: DType](A: Matrix[dtype]) -> Scalar[dtype]: + if axis < 0: + normalized_axis = axis + a.ndim + + if normalized_axis < 0 or normalized_axis >= a.ndim: + raise Error(String("Axis {} out of bounds!").format(axis)) + + return ( + sum(a, axis=normalized_axis).astype[returned_dtype]() + / a.shape[normalized_axis] + ) + + +fn mean[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: Matrix[dtype]) -> Scalar[returned_dtype]: """ Calculate the arithmetic average of all items in the Matrix. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: - A: Matrix. + a: A matrix. + + Returns: + A scalar of the returned data type. """ - return sum(A) / A.size + return sum(a).cast[returned_dtype]() / a.size -fn mean[dtype: DType](A: Matrix[dtype], axis: Int) raises -> Matrix[dtype]: +fn mean[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](a: Matrix[dtype], axis: Int) raises -> Matrix[returned_dtype]: """ Calculate the arithmetic average of a Matrix along the axis. + + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: - A: Matrix. - axis: 0 or 1. + a: A matrix. + axis: The axis along which the mean is performed. + + Returns: + A matrix of the returned data type. """ if axis == 0: - return sum(A, axis=0) / A.shape[0] + return sum(a, axis=0).astype[returned_dtype]() / a.shape[0] elif axis == 1: - return sum(A, axis=1) / A.shape[1] + return sum(a, axis=1).astype[returned_dtype]() / a.shape[1] else: raise Error(String("The axis can either be 1 or 0!")) @@ -136,7 +175,9 @@ fn median[ return (sorted_array.item(n // 2 - 1) + sorted_array.item(n // 2)) / 2 -fn std[dtype: DType](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[dtype]: +fn std[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the standard deviation. @@ -148,12 +189,12 @@ fn std[dtype: DType](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[dtype]: if ddof >= A.size: raise Error(String("ddof {ddof} should be smaller than size {A.size}")) - return variance(A, ddof=ddof) ** 0.5 + return variance[returned_dtype](A, ddof=ddof) ** 0.5 fn std[ - dtype: DType -](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[dtype]: + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ Compute the standard deviation along axis. @@ -163,12 +204,12 @@ fn std[ ddof: Delta degree of freedom. """ - return variance(A, axis, ddof=ddof) ** 0.5 + return variance[returned_dtype](A, axis, ddof=ddof) ** 0.5 fn variance[ - dtype: DType -](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[dtype]: + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: """ Compute the variance. @@ -180,12 +221,15 @@ fn variance[ if ddof >= A.size: raise Error(String("ddof {ddof} should be smaller than size {A.size}")) - return sum((A - mean(A)) * (A - mean(A))) / (A.size - ddof) + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A)) + ) / (A.size - ddof) fn variance[ - dtype: DType -](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[dtype]: + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: Matrix[dtype], axis: Int, ddof: Int = 0) raises -> Matrix[returned_dtype]: """ Compute the variance along axis. @@ -204,13 +248,17 @@ fn variance[ ) if axis == 0: - return sum((A - mean(A, axis=0)) * (A - mean(A, axis=0)), axis=0) / ( - A.shape[0] - ddof - ) + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)), + axis=0, + ) / (A.shape[0] - ddof) elif axis == 1: - return sum((A - mean(A, axis=1)) * (A - mean(A, axis=1)), axis=1) / ( - A.shape[1] - ddof - ) + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)), + axis=1, + ) / (A.shape[1] - ddof) else: raise Error(String("The axis can either be 1 or 0!")) @@ -234,7 +282,7 @@ fn cumpvariance[ """ var mean_value: Scalar[dtype] if not mu: - mean_value = cummean[dtype](array) + mean_value = mean[dtype](array) else: mean_value = mu.value() @@ -267,7 +315,7 @@ fn cumvariance[ var mean_value: Scalar[dtype] if not mu: - mean_value = cummean[dtype](array) + mean_value = mean[dtype](array) else: mean_value = mu.value() diff --git a/tests/routines/test_random.mojo b/tests/routines/test_random.mojo index e77202c8..963714a0 100644 --- a/tests/routines/test_random.mojo +++ b/tests/routines/test_random.mojo @@ -16,8 +16,8 @@ def test_randminmax(): """Test random array generation with min and max values.""" var arr_variadic = nm.random.rand[nm.f64](10, 10, 10, min=1, max=2) var arr_list = nm.random.rand[nm.f64](List[Int](10, 10, 10), min=3, max=4) - var arr_variadic_mean = nm.cummean(arr_variadic) - var arr_list_mean = nm.cummean(arr_list) + var arr_variadic_mean = nm.mean(arr_variadic) + var arr_list_mean = nm.mean(arr_list) assert_almost_equal( arr_variadic_mean, 1.5, @@ -44,9 +44,9 @@ def test_randn(): 20, 20, 20, mean=1.0, variance=3.0 ) - var arr_variadic_mean01 = nm.cummean(arr_variadic_01) - var arr_variadic_mean31 = nm.cummean(arr_variadic_31) - var arr_variadic_mean12 = nm.cummean(arr_variadic_12) + var arr_variadic_mean01 = nm.mean(arr_variadic_01) + var arr_variadic_mean31 = nm.mean(arr_variadic_31) + var arr_variadic_mean12 = nm.mean(arr_variadic_12) var arr_variadic_var01 = nm.cumvariance(arr_variadic_01) var arr_variadic_var31 = nm.cumvariance(arr_variadic_31) var arr_variadic_var12 = nm.cumvariance(arr_variadic_12) @@ -102,9 +102,9 @@ def test_randn_list(): List[Int](20, 20, 20), mean=1.0, variance=2.0 ) - var arr_list_mean01 = nm.cummean(arr_list_01) - var arr_list_mean31 = nm.cummean(arr_list_31) - var arr_list_mean12 = nm.cummean(arr_list_12) + var arr_list_mean01 = nm.mean(arr_list_01) + var arr_list_mean31 = nm.mean(arr_list_31) + var arr_list_mean12 = nm.mean(arr_list_12) var arr_list_var01 = nm.cumvariance(arr_list_01) var arr_list_var31 = nm.cumvariance(arr_list_31) var arr_list_var12 = nm.cumvariance(arr_list_12) @@ -155,8 +155,8 @@ def test_rand_exponential(): List[Int](20, 20, 20), rate=0.5 ) - var arr_variadic_mean = nm.cummean(arr_variadic) - var arr_list_mean = nm.cummean(arr_list) + var arr_variadic_mean = nm.mean(arr_variadic) + var arr_list_mean = nm.mean(arr_list) # For exponential distribution, mean = 1 / rate assert_almost_equal( From 4bbcaf6c7ebaa15cac74e817098a6f4eef3c4dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 17:14:27 +0100 Subject: [PATCH 05/13] Update mode and median --- mojoproject.toml | 3 +- numojo/routines/statistics/averages.mojo | 31 +++++++++-------- tests/core/test_matrix.mojo | 44 ------------------------ tests/routines/test_statistics.mojo | 27 +++++++++++++++ 4 files changed, 46 insertions(+), 59 deletions(-) create mode 100644 tests/routines/test_statistics.mojo diff --git a/mojoproject.toml b/mojoproject.toml index 6c6b7553..27c82c4d 100644 --- a/mojoproject.toml +++ b/mojoproject.toml @@ -30,8 +30,9 @@ t = "clear && magic run test" # run individual tests to avoid overheat test_core = "magic run mojo test tests/core -I ./ -I ./tests/" test_creation = "magic run mojo test tests/routines/test_creation.mojo -I ./ -I ./tests/" -test_manipulation = "magic run mojo test tests/routines/test_manipulation.mojo -I ./ -I ./tests/" test_linalg = "magic run mojo test tests/routines/test_linalg.mojo -I ./ -I ./tests/" +test_manipulation = "magic run mojo test tests/routines/test_manipulation.mojo -I ./ -I ./tests/" +test_statistics = "magic run mojo test tests/routines/test_statistics.mojo -I ./ -I ./tests/" # run all final checks before a commit final = "magic run test && magic run format && magic run package" diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index e332b287..7e4c352a 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -14,9 +14,9 @@ from numojo.core.matrix import Matrix from numojo.core.utility import bool_to_numeric from numojo.routines.logic.comparison import greater, less from numojo.routines.math.arithmetic import add -from numojo.routines.sorting import binary_sort from numojo.routines.math.sums import sum, cumsum import numojo.routines.math.misc as misc +from numojo.routines.sorting import sort fn mean[ @@ -118,9 +118,7 @@ fn mean[ raise Error(String("The axis can either be 1 or 0!")) -fn mode[ - dtype: DType = DType.float64 -](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: +fn mode[dtype: DType](array: NDArray[dtype]) raises -> Scalar[dtype]: """Mode of all items of an array. Parameters: @@ -132,7 +130,8 @@ fn mode[ Returns: The mode of all of the member values of array as a SIMD Value of `dtype`. """ - var sorted_array: NDArray[dtype] = binary_sort[dtype](array) + + var sorted_array: NDArray[dtype] = sort(array) var max_count = 0 var mode_value = sorted_array.item(0) var current_count = 1 @@ -152,14 +151,15 @@ fn mode[ return mode_value -# * IMPLEMENT median high and low fn median[ - dtype: DType = DType.float64 -](array: NDArray[dtype]) raises -> SIMD[dtype, 1]: - """Median value of all items of an array. + dtype: DType, //, returned_dtype: DType = DType.float64 +](array: NDArray[dtype]) raises -> Scalar[returned_dtype]: + """ + Median value of all items of an array. Parameters: dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. Args: array: An NDArray. @@ -167,12 +167,15 @@ fn median[ Returns: The median of all of the member values of array as a SIMD Value of `dtype`. """ - var sorted_array = binary_sort[dtype](array) - var n = array.num_elements() - if n % 2 == 1: - return sorted_array.item(n // 2) + var sorted_array = sort(array) + + if array.size % 2 == 1: + return sorted_array.item(array.size // 2).cast[returned_dtype]() else: - return (sorted_array.item(n // 2 - 1) + sorted_array.item(n // 2)) / 2 + return ( + sorted_array.item(array.size // 2 - 1) + + sorted_array.item(array.size // 2) + ).cast[returned_dtype]() / 2 fn std[ diff --git a/tests/core/test_matrix.mojo b/tests/core/test_matrix.mojo index e866cbf4..342bbd24 100644 --- a/tests/core/test_matrix.mojo +++ b/tests/core/test_matrix.mojo @@ -316,50 +316,6 @@ def test_hyperbolic(): check_matrices_close(nm.atanh(B), np.arctanh(Bnp), "atanh is broken") -# ===-----------------------------------------------------------------------===# -# Statistics -# ===-----------------------------------------------------------------------===# - - -def test_statistics(): - var np = Python.import_module("numpy") - var A = Matrix.rand[f64]((100, 100)) - var Anp = np.matrix(A.to_numpy()) - - assert_true( - np.all(np.isclose(nm.mean(A), np.mean(Anp), atol=0.1)), - "`mean` is broken", - ) - for i in range(2): - check_matrices_close( - nm.mean(A, i), - np.mean(Anp, i), - String("`mean` is broken for {i}-dimension"), - ) - - assert_true( - np.all(np.isclose(nm.variance(A), np.`var`(Anp), atol=0.1)), - "`variance` is broken", - ) - for i in range(2): - check_matrices_close( - nm.variance(A, i), - np.`var`(Anp, i), - String("`variance` is broken for {i}-dimension"), - ) - - assert_true( - np.all(np.isclose(nm.std(A), np.std(Anp), atol=0.1)), - "`std` is broken", - ) - for i in range(2): - check_matrices_close( - nm.std(A, i), - np.std(Anp, i), - String("`std` is broken for {i}-dimension"), - ) - - def test_sorting(): var np = Python.import_module("numpy") var A = Matrix.rand[f64]((10, 10)) diff --git a/tests/routines/test_statistics.mojo b/tests/routines/test_statistics.mojo new file mode 100644 index 00000000..bcf42158 --- /dev/null +++ b/tests/routines/test_statistics.mojo @@ -0,0 +1,27 @@ +import numojo as nm +from numojo.prelude import * +from numojo.core.matrix import Matrix +from python import Python, PythonObject +from testing.testing import assert_raises, assert_true +from utils_for_test import check, check_is_close + +# ===-----------------------------------------------------------------------===# +# Statistics +# ===-----------------------------------------------------------------------===# + + +def test_mean(): + var np = Python.import_module("numpy") + var A = nm.NDArray[f64](Shape(10, 10)) + var Anp = np.matrix(A.to_numpy()) + + assert_true( + np.all(np.isclose(nm.mean(A), np.mean(Anp), atol=0.1)), + "`mean` is broken", + ) + for i in range(2): + check_is_close( + nm.mean(A, i), + np.mean(Anp, i), + String("`mean` is broken for {i}-dimension"), + ) From bb648478b2c2f04191d5aa8481b2b8adebc78d2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 18:01:02 +0100 Subject: [PATCH 06/13] Raises for `sum` over axis for 1darray --- numojo/core/ndarray.mojo | 7 ++ numojo/routines/math/sums.mojo | 32 +++++--- numojo/routines/statistics/averages.mojo | 96 +++++++++++++++++++++++- 3 files changed, 123 insertions(+), 12 deletions(-) diff --git a/numojo/core/ndarray.mojo b/numojo/core/ndarray.mojo index 0f091749..5e5c294e 100644 --- a/numojo/core/ndarray.mojo +++ b/numojo/core/ndarray.mojo @@ -1854,6 +1854,13 @@ struct NDArray[dtype: DType = DType.float64]( fn __pow__(self, p: Int) -> Self: return self._elementwise_pow(p) + fn __pow__(self, rhs: Scalar[dtype]) raises -> Self: + """Power of items.""" + var res = self + for i in range(self.size): + res._buf.ptr[i] = self._buf.ptr[i].__pow__(rhs) + return res^ + fn __pow__(self, p: Self) raises -> Self: if self.size != p.size: raise Error( diff --git a/numojo/routines/math/sums.mojo b/numojo/routines/math/sums.mojo index 6d8eddda..526635d3 100644 --- a/numojo/routines/math/sums.mojo +++ b/numojo/routines/math/sums.mojo @@ -39,9 +39,7 @@ fn sum[dtype: DType](A: NDArray[dtype]) -> Scalar[dtype]: return res -fn sum[ - dtype: DType -](A: NDArray[dtype], owned axis: Int) raises -> NDArray[dtype]: +fn sum[dtype: DType](A: NDArray[dtype], axis: Int) raises -> NDArray[dtype]: """ Returns sums of array elements over a given axis. @@ -52,6 +50,10 @@ fn sum[ print(nm.sum(A, axis=0)) ``` + Raises: + Error: If the axis is out of bound. + Error: If the number of dimensions is 1. + Args: A: NDArray. axis: The axis along which the sum is performed. @@ -60,25 +62,35 @@ fn sum[ An NDArray. """ - if axis < 0: - axis += A.ndim - if (axis < 0) or (axis >= A.ndim): + var normalized_axis = axis + if normalized_axis < 0: + normalized_axis += A.ndim + + if (normalized_axis < 0) or (normalized_axis >= A.ndim): raise Error( - String("Invalid index: index out of bound [0, {}).").format(A.ndim) + String("Axis {} out of bound [0, {}).").format(axis, A.ndim) + ) + if A.ndim == 1: + raise Error( + String( + "`numojo.routines.math.sums.sum()`: " + "Cannot sum over axis for 1-d array. " + "Please remove the `axis` argument." + ) ) var result_shape: List[Int] = List[Int]() - var size_of_axis: Int = A.shape[axis] + var size_of_axis: Int = A.shape[normalized_axis] var slices: List[Slice] = List[Slice]() for i in range(A.ndim): - if i != axis: + if i != normalized_axis: result_shape.append(A.shape[i]) slices.append(Slice(0, A.shape[i])) else: slices.append(Slice(0, 0)) # Temp value var result = zeros[dtype](NDArrayShape(result_shape)) for i in range(size_of_axis): - slices[axis] = Slice(i, i + 1) + slices[normalized_axis] = Slice(i, i + 1) var arr_slice = A[slices] result += arr_slice diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index 7e4c352a..98a52ba0 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -60,9 +60,9 @@ fn mean[ var normalized_axis = axis if axis < 0: - normalized_axis = axis + a.ndim + normalized_axis += a.ndim - if normalized_axis < 0 or normalized_axis >= a.ndim: + if (normalized_axis < 0) or (normalized_axis >= a.ndim): raise Error(String("Axis {} out of bounds!").format(axis)) return ( @@ -178,6 +178,40 @@ fn median[ ).cast[returned_dtype]() / 2 +fn std[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: + """ + Compute the standard deviation. + + Args: + A: An array. + ddof: Delta degree of freedom. + """ + + if ddof >= A.size: + raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + + return variance[returned_dtype](A, ddof=ddof) ** 0.5 + + +fn std[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[ + returned_dtype +]: + """ + Compute the standard deviation along axis. + + Args: + A: An Array. + axis: 0 or 1. + ddof: Delta degree of freedom. + """ + + return variance[returned_dtype](A, axis, ddof=ddof) ** 0.5 + + fn std[ dtype: DType, //, returned_dtype: DType = DType.float64 ](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: @@ -210,6 +244,64 @@ fn std[ return variance[returned_dtype](A, axis, ddof=ddof) ** 0.5 +fn variance[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: + """ + Compute the variance. + + Args: + A: An array. + ddof: Delta degree of freedom. + """ + + if ddof >= A.size: + raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A)) + ) / (A.size - ddof) + + +fn variance[ + dtype: DType, //, returned_dtype: DType = DType.float64 +](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[ + returned_dtype +]: + """ + Compute the variance along axis. + + Args: + A: An array. + axis: 0 or 1. + ddof: Delta degree of freedom. + """ + + if (ddof >= A.shape[0]) or (ddof >= A.shape[1]): + raise Error( + String( + "ddof {ddof} should be smaller than size" + " {A.shape[0]}x{A.shape[1]}" + ) + ) + + if axis == 0: + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)), + axis=0, + ) / (A.shape[0] - ddof) + elif axis == 1: + return sum( + (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)) + * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)), + axis=1, + ) / (A.shape[1] - ddof) + else: + raise Error(String("The axis can either be 1 or 0!")) + + fn variance[ dtype: DType, //, returned_dtype: DType = DType.float64 ](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: From 8b4e3e2212392f5e219d27f0183bcf076dd0ffa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 21:19:27 +0100 Subject: [PATCH 07/13] Remove variance and std by axis --- numojo/routines/statistics/averages.mojo | 55 ------------------------ tests/routines/test_statistics.mojo | 23 ++++++++-- 2 files changed, 19 insertions(+), 59 deletions(-) diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index 98a52ba0..cb68d59f 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -195,23 +195,6 @@ fn std[ return variance[returned_dtype](A, ddof=ddof) ** 0.5 -fn std[ - dtype: DType, //, returned_dtype: DType = DType.float64 -](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[ - returned_dtype -]: - """ - Compute the standard deviation along axis. - - Args: - A: An Array. - axis: 0 or 1. - ddof: Delta degree of freedom. - """ - - return variance[returned_dtype](A, axis, ddof=ddof) ** 0.5 - - fn std[ dtype: DType, //, returned_dtype: DType = DType.float64 ](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: @@ -264,44 +247,6 @@ fn variance[ ) / (A.size - ddof) -fn variance[ - dtype: DType, //, returned_dtype: DType = DType.float64 -](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[ - returned_dtype -]: - """ - Compute the variance along axis. - - Args: - A: An array. - axis: 0 or 1. - ddof: Delta degree of freedom. - """ - - if (ddof >= A.shape[0]) or (ddof >= A.shape[1]): - raise Error( - String( - "ddof {ddof} should be smaller than size" - " {A.shape[0]}x{A.shape[1]}" - ) - ) - - if axis == 0: - return sum( - (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)) - * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=0)), - axis=0, - ) / (A.shape[0] - ddof) - elif axis == 1: - return sum( - (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)) - * (A.astype[returned_dtype]() - mean[returned_dtype](A, axis=1)), - axis=1, - ) / (A.shape[1] - ddof) - else: - raise Error(String("The axis can either be 1 or 0!")) - - fn variance[ dtype: DType, //, returned_dtype: DType = DType.float64 ](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]: diff --git a/tests/routines/test_statistics.mojo b/tests/routines/test_statistics.mojo index bcf42158..abf39f39 100644 --- a/tests/routines/test_statistics.mojo +++ b/tests/routines/test_statistics.mojo @@ -10,10 +10,10 @@ from utils_for_test import check, check_is_close # ===-----------------------------------------------------------------------===# -def test_mean(): +def test_mean_median_var_std(): var np = Python.import_module("numpy") - var A = nm.NDArray[f64](Shape(10, 10)) - var Anp = np.matrix(A.to_numpy()) + var A = nm.random.randn(10, 10) + var Anp = A.to_numpy() assert_true( np.all(np.isclose(nm.mean(A), np.mean(Anp), atol=0.1)), @@ -23,5 +23,20 @@ def test_mean(): check_is_close( nm.mean(A, i), np.mean(Anp, i), - String("`mean` is broken for {i}-dimension"), + String("`mean` is broken for {}-dimension".format(i)), ) + + assert_true( + np.all(np.isclose(nm.median(A), np.median(Anp), atol=0.1)), + "`median` is broken", + ) + + assert_true( + np.all(np.isclose(nm.variance(A), np.`var`(Anp), atol=0.1)), + "`variance` is broken", + ) + + assert_true( + np.all(np.isclose(nm.std(A), np.std(Anp), atol=0.1)), + "`std` is broken", + ) From c888ff35105af231283e566b1f6eeabdbfb257e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 21:21:26 +0100 Subject: [PATCH 08/13] Remove cumpvariance, cumvariance, cumpstdev, cumstdev --- numojo/routines/statistics/averages.mojo | 104 ----------------------- 1 file changed, 104 deletions(-) diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index cb68d59f..a9fc54f4 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -301,107 +301,3 @@ fn variance[ ) / (A.shape[1] - ddof) else: raise Error(String("The axis can either be 1 or 0!")) - - -fn cumpvariance[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: - """ - Population variance of a array. - - Parameters: - dtype: The element type.. - - Args: - array: A NDArray. - mu: The mean of the array, if provided. - Returns: - The variance of all of the member values of array as a SIMD Value of `dtype`. - """ - var mean_value: Scalar[dtype] - if not mu: - mean_value = mean[dtype](array) - else: - mean_value = mu.value() - - var result = Scalar[dtype]() - - for i in range(array.num_elements()): - result += (array.load(i) - mean_value) ** 2 - - return mt.sqrt(result / (array.num_elements())) - - -fn cumvariance[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: - """ - Variance of a array. - - Parameters: - dtype: The element type. - - Args: - array: A NDArray. - mu: The mean of the array, if provided. - - Returns: - The variance of all of the member values of array as a SIMD Value of `dtype`. - """ - var mean_value: Scalar[dtype] - - if not mu: - mean_value = mean[dtype](array) - else: - mean_value = mu.value() - - var result = Scalar[dtype]() - for i in range(array.num_elements()): - result += (array.load(i) - mean_value) ** 2 - - return mt.sqrt(result / (array.num_elements() - 1)) - - -fn cumpstdev[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: - """ - Population standard deviation of a array. - - Parameters: - dtype: The element type. - - Args: - array: A NDArray. - mu: The mean of the array, if provided. - - Returns: - The standard deviation of all of the member values of array as a SIMD Value of `dtype`. - """ - return mt.sqrt(cumpvariance[dtype](array, mu)) - - -fn cumstdev[ - dtype: DType = DType.float64 -](array: NDArray[dtype], mu: Optional[Scalar[dtype]] = None) raises -> SIMD[ - dtype, 1 -]: - """ - Standard deviation of a array. - - Parameters: - dtype: The element type. - - Args: - array: A NDArray. - mu: The mean of the array, if provided. - Returns: - The standard deviation of all of the member values of array as a SIMD Value of `dtype`. - """ - return mt.sqrt(cumvariance[dtype](array, mu)) From d660e0263720aa8302fa960530577ae9aa829413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 22:40:27 +0100 Subject: [PATCH 09/13] Update test and init --- mojoproject.toml | 1 + numojo/__init__.mojo | 4 ---- numojo/routines/statistics/__init__.mojo | 4 ---- tests/routines/test_random.mojo | 24 ++++++++++++------------ 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/mojoproject.toml b/mojoproject.toml index 27c82c4d..03e98ec6 100644 --- a/mojoproject.toml +++ b/mojoproject.toml @@ -32,6 +32,7 @@ test_core = "magic run mojo test tests/core -I ./ -I ./tests/" test_creation = "magic run mojo test tests/routines/test_creation.mojo -I ./ -I ./tests/" test_linalg = "magic run mojo test tests/routines/test_linalg.mojo -I ./ -I ./tests/" test_manipulation = "magic run mojo test tests/routines/test_manipulation.mojo -I ./ -I ./tests/" +test_random = "magic run mojo test tests/routines/test_random.mojo -I ./ -I ./tests/" test_statistics = "magic run mojo test tests/routines/test_statistics.mojo -I ./ -I ./tests/" # run all final checks before a commit diff --git a/numojo/__init__.mojo b/numojo/__init__.mojo index a4ee84d6..de10f1a8 100644 --- a/numojo/__init__.mojo +++ b/numojo/__init__.mojo @@ -151,10 +151,6 @@ from numojo.routines.statistics import ( median, variance, std, - cumpvariance, - cumvariance, - cumpstdev, - cumstdev, ) from numojo.routines import bitwise diff --git a/numojo/routines/statistics/__init__.mojo b/numojo/routines/statistics/__init__.mojo index df6d0007..253b83dd 100644 --- a/numojo/routines/statistics/__init__.mojo +++ b/numojo/routines/statistics/__init__.mojo @@ -6,8 +6,4 @@ from .averages import ( median, variance, std, - cumpvariance, - cumvariance, - cumpstdev, - cumstdev, ) diff --git a/tests/routines/test_random.mojo b/tests/routines/test_random.mojo index 963714a0..bc167141 100644 --- a/tests/routines/test_random.mojo +++ b/tests/routines/test_random.mojo @@ -41,15 +41,15 @@ def test_randn(): 20, 20, 20, mean=3.0, variance=1.0 ) var arr_variadic_12 = nm.random.randn[nm.f64]( - 20, 20, 20, mean=1.0, variance=3.0 + 20, 20, 20, mean=1.0, variance=2.0 ) var arr_variadic_mean01 = nm.mean(arr_variadic_01) var arr_variadic_mean31 = nm.mean(arr_variadic_31) var arr_variadic_mean12 = nm.mean(arr_variadic_12) - var arr_variadic_var01 = nm.cumvariance(arr_variadic_01) - var arr_variadic_var31 = nm.cumvariance(arr_variadic_31) - var arr_variadic_var12 = nm.cumvariance(arr_variadic_12) + var arr_variadic_var01 = nm.variance(arr_variadic_01) + var arr_variadic_var31 = nm.variance(arr_variadic_31) + var arr_variadic_var12 = nm.variance(arr_variadic_12) assert_almost_equal( arr_variadic_mean01, @@ -84,7 +84,7 @@ def test_randn(): ) assert_almost_equal( arr_variadic_var12, - 3, + 2, msg="Variance of random array with mean 1 and variance 2", atol=0.1, ) @@ -105,9 +105,9 @@ def test_randn_list(): var arr_list_mean01 = nm.mean(arr_list_01) var arr_list_mean31 = nm.mean(arr_list_31) var arr_list_mean12 = nm.mean(arr_list_12) - var arr_list_var01 = nm.cumvariance(arr_list_01) - var arr_list_var31 = nm.cumvariance(arr_list_31) - var arr_list_var12 = nm.cumvariance(arr_list_12) + var arr_list_var01 = nm.variance(arr_list_01) + var arr_list_var31 = nm.variance(arr_list_31) + var arr_list_var12 = nm.variance(arr_list_12) assert_almost_equal( arr_list_mean01, @@ -173,18 +173,18 @@ def test_rand_exponential(): ) # For exponential distribution, variance = 1 / (rate^2) - var arr_variadic_var = nm.cumvariance(arr_variadic) - var arr_list_var = nm.cumvariance(arr_list) + var arr_variadic_var = nm.variance(arr_variadic) + var arr_list_var = nm.variance(arr_list) assert_almost_equal( arr_variadic_var, - 1 / (2), + 1 / 2, msg="Variance of exponential distribution with rate 2.0", atol=0.1, ) assert_almost_equal( arr_list_var, - 1 / (0.5), + 1 / 0.5, msg="Variance of exponential distribution with rate 0.5", atol=0.5, ) From 22914cd69dcfedd788dbc9ab26a17a040b5cf396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 23:07:43 +0100 Subject: [PATCH 10/13] Squashed commit of the following: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 539af950b31a25346d96c32764615b7a3c0cdb67 Author: ZHU Yuhao 朱宇浩 Date: Tue Jan 28 22:14:28 2025 +0100 [routine][random] Update the random module + unify behaviors + add `randint` (#199) The `random` module was created at quite an early stage. A lot of things should be re-considered. This PR aims to refactor the random module by: 1. Aligning the functional behaviors as much as possible with `numpy` while keeping internal consistency of style: The `shape` always comes as the first argument. 2. For all functions, accept `shape` as an `NDArrayShape`. Meanwhile, we still provide overloads for `*shape: Int`. (`shape: List[Int]` is also possible but not recommened). 3. `rand` now generates uniformed distributed values and does not accept integral types. 4. `randint` is added to generate random integral values based on `low` and `high`. add tests for it. 5. `random_exponential` is renamed as `exponential` (same as numpy)[https://numpy.org/doc/stable/reference/random/generated/numpy.random.exponential.html#numpy.random.exponential]. --- numojo/routines/creation.mojo | 6 +- numojo/routines/random.mojo | 444 ++++++++++++++++++++++---------- tests/routines/test_random.mojo | 29 ++- 3 files changed, 334 insertions(+), 145 deletions(-) diff --git a/numojo/routines/creation.mojo b/numojo/routines/creation.mojo index ac9a4a89..9ac9a06d 100644 --- a/numojo/routines/creation.mojo +++ b/numojo/routines/creation.mojo @@ -2220,10 +2220,10 @@ fn array[ continue shape.append(int(data.shape[i])) A = NDArray[dtype](NDArrayShape(shape), order=order) - A._buf = OwnData[dtype](A.size) - # memset_zero(A._buf, A.size) for i in range(A.size): - A._buf.ptr[i] = float(data.item(PythonObject(i))).cast[dtype]() + (A._buf.ptr + i).init_pointee_copy( + float(data.item(PythonObject(i))).cast[dtype]() + ) return A diff --git a/numojo/routines/random.mojo b/numojo/routines/random.mojo index 72cb1dcb..366de311 100644 --- a/numojo/routines/random.mojo +++ b/numojo/routines/random.mojo @@ -1,28 +1,43 @@ -""" -Random values array generation. -""" # ===----------------------------------------------------------------------=== # -# Implements RANDOM -# Last updated: 2024-09-06 +# Distributed under the Apache 2.0 License with LLVM Exceptions. +# See LICENSE and the LLVM License for more information. +# https://github.com/Mojo-Numerics-and-Algorithms-group/NuMojo/blob/main/LICENSE +# https://llvm.org/LICENSE.txt # ===----------------------------------------------------------------------=== # +""" +numojo.routines.random +---------------------- + +Creates array of the given shape and populate it with random samples from +a certain distribution. + +This module is similar to `numpy.random`. However, in this module, the shape is +always appearing as the first argument. +""" + import math as mt from random import random as builtin_random -from builtin.tuple import Tuple from numojo.core.ndarray import NDArray -from numojo.core.utility import is_inttype, is_floattype + +# ===----------------------------------------------------------------------=== # +# Uniform distribution +# ===----------------------------------------------------------------------=== # -fn rand[dtype: DType = DType.float64](*shape: Int) raises -> NDArray[dtype]: +fn rand[ + dtype: DType = DType.float64 +](shape: NDArrayShape) raises -> NDArray[dtype]: """ - Generate a random NDArray of the given shape and dtype. + Creates an array of the given shape and populate it with random samples from + a uniform distribution over [0, 1). Example: - ```py - var arr = numojo.core.random.rand[numojo.i16](3,2,4) - print(arr) - ``` + ```mojo + var arr = numojo.core.random.rand[numojo.i16](Shape(3,2,4)) + print(arr) + ``` Parameters: dtype: The data type of the NDArray elements. @@ -34,10 +49,12 @@ fn rand[dtype: DType = DType.float64](*shape: Int) raises -> NDArray[dtype]: The generated NDArray of type `dtype` filled with random values. """ - if dtype.is_integral(): + builtin_random.seed() + + @parameter + if not dtype.is_floating_point(): raise Error( - "Integral values cannot be sampled between 0 and 1. Use" - " `rand(*shape, min, max)` instead." + "Invalid type provided. dtype must be a floating-point type." ) var result: NDArray[dtype] = NDArray[dtype](shape) @@ -47,67 +64,59 @@ fn rand[dtype: DType = DType.float64](*shape: Int) raises -> NDArray[dtype]: dtype ]() (result._buf.ptr + i).init_pointee_copy(temp) + return result^ -@parameter -fn _int_rand_func[ - dtype: DType -](mut result: NDArray[dtype], min: Scalar[dtype], max: Scalar[dtype]): +fn rand[dtype: DType = DType.float64](*shape: Int) raises -> NDArray[dtype]: """ - Generate random integers between `min` and `max` and store them in the given NDArray. - - Parameters: - dtype: The data type of the random integers. - - Args: - result: The NDArray to store the random integers. - min: The minimum value of the random integers. - max: The maximum value of the random integers. + Overloads the function `rand(shape: NDArrayShape)`. + Creates an array of the given shape and populate it with random samples from + a uniform distribution over [0, 1). """ - builtin_random.randint[dtype]( - ptr=result._buf.ptr, - size=result.size, - low=int(min), - high=int(max), - ) + return rand[dtype](NDArrayShape(shape)) -@parameter -fn _float_rand_func[ - dtype: DType -](mut result: NDArray[dtype], min: Scalar[dtype], max: Scalar[dtype]): +fn rand[ + dtype: DType = DType.float64 +](shape: List[Int]) raises -> NDArray[dtype]: """ - Generate random floating-point numbers between `min` and `max` and store them in the given NDArray. + Overloads the function `rand(shape: NDArrayShape)`. + Creates an array of the given shape and populate it with random samples from + a uniform distribution over [0, 1). + """ + return rand[dtype](NDArrayShape(shape)) - Parameters: - dtype: The data type of the random floating-point numbers. - Args: - result: The NDArray to store the random floating-point numbers. - min: The minimum value of the random floating-point numbers. - max: The maximum value of the random floating-point numbers. +fn rand[ + dtype: DType = DType.float64 +](shape: VariadicList[Int]) raises -> NDArray[dtype]: """ - for i in range(result.size): - var temp: Scalar[dtype] = builtin_random.random_float64( - min.cast[f64](), max.cast[f64]() - ).cast[dtype]() - (result._buf.ptr + i).init_pointee_copy(temp) + Overloads the function `rand(shape: NDArrayShape)` + Creates an array of the given shape and populate it with random samples from + a uniform distribution over [0, 1). + """ + return rand[dtype](NDArrayShape(shape)) fn rand[ dtype: DType = DType.float64 -](*shape: Int, min: Scalar[dtype], max: Scalar[dtype]) raises -> NDArray[dtype]: +]( + shape: NDArrayShape, min: Scalar[dtype], max: Scalar[dtype] +) raises -> NDArray[dtype]: """ - Generate a random NDArray of the given shape and dtype with values between `min` and `max`. + Creates an array of the given shape and populate it with random samples from + a uniform distribution over [min, max). This is equivalent to + `min + rand() * (max - min)`. Example: - ```py - var arr = numojo.core.random.rand[numojo.i16](3,2,4, min=0, max=100) - print(arr) - ``` + ```mojo + var arr = numojo.core.random.rand[numojo.i16](Shape(3,2,4), min=0, max=100) + print(arr) + ``` + Raises: - Error: If the dtype is not an integral or floating-point type. + Error: If the dtype is not a floating-point type. Parameters: dtype: The data type of the NDArray elements. @@ -118,119 +127,188 @@ fn rand[ max: The maximum value of the random values. Returns: - The generated NDArray of type `dtype` filled with random values between `min` and `max`. + The generated NDArray of type `dtype` filled with random values + between `min` and `max`. """ - var result: NDArray[dtype] = NDArray[dtype](shape) - builtin_random.seed() @parameter - if is_floattype[dtype](): - _float_rand_func[dtype](result, min, max) - elif is_inttype[dtype](): - _int_rand_func[dtype](result, min, max) - else: + if not dtype.is_floating_point(): raise Error( - "Invalid type provided. dtype must be either an integral or" - " floating-point type." + "Invalid type provided. dtype must be a floating-point type." + ) + + var result: NDArray[dtype] = NDArray[dtype](shape) + + for i in range(result.size): + (result._buf.ptr + i).init_pointee_copy( + builtin_random.random_float64( + min.cast[DType.float64](), max.cast[DType.float64]() + ).cast[dtype]() ) return result^ +fn rand[ + dtype: DType = DType.float64 +](*shape: Int, min: Scalar[dtype], max: Scalar[dtype]) raises -> NDArray[dtype]: + """ + Overloads the function `rand(shape: NDArrayShape, min, max)`. + Creates an array of the given shape and populate it with random samples from + a uniform distribution over [min, max). This is equivalent to + `min + rand() * (max - min)`. + """ + return rand[dtype](NDArrayShape(shape), min=min, max=max) + + fn rand[ dtype: DType = DType.float64 ](shape: List[Int], min: Scalar[dtype], max: Scalar[dtype]) raises -> NDArray[ dtype ]: """ - Generate a random NDArray of the given shape and dtype with values between `min` and `max`. + Overloads the function `rand(shape: NDArrayShape, min, max)`. + Creates an array of the given shape and populate it with random samples from + a uniform distribution over [min, max). This is equivalent to + `min + rand() * (max - min)`. + """ + return rand[dtype](NDArrayShape(shape), min=min, max=max) + + +# ===----------------------------------------------------------------------=== # +# Discrete integers +# ===----------------------------------------------------------------------=== # - Example: - ```py - var arr = numojo.core.random.rand[numojo.i16]((3,2,4), min=0, max=100) - print(arr) - ``` + +fn randint[ + dtype: DType = DType.int64 +](shape: NDArrayShape, low: Int, high: Int) raises -> NDArray[dtype]: + """ + Return an array of random integers from low (inclusive) to high (exclusive). + Note that it is different from the built-in `random.randint()` function + which returns integer in range low (inclusive) to high (inclusive). Raises: - Error: If the dtype is not an integral or floating-point type. + Error: If the dtype is not a integer type. + Error: If high is not greater than low. Parameters: dtype: The data type of the NDArray elements. Args: shape: The shape of the NDArray. - min: The minimum value of the random values. - max: The maximum value of the random values. + low: The minimum value of the random values. + high: The maximum value of the random values. Returns: - The generated NDArray of type `dtype` filled with random values between `min` and `max`. + An array of random integers from low (inclusive) to high (exclusive). """ + + @parameter + if not dtype.is_integral(): + raise Error("Only Integral values can be sampled using this function.") + + if high <= low: + raise Error("High must be greater than low.") + var result: NDArray[dtype] = NDArray[dtype](shape) - builtin_random.seed() + + builtin_random.randint[dtype]( + ptr=result._buf.ptr, size=result.size, low=low, high=high - 1 + ) + + return result^ + + +fn randint[ + dtype: DType = DType.int64 +](shape: NDArrayShape, high: Int) raises -> NDArray[dtype]: + """ + Return an array of random integers from 0 (inclusive) to high (exclusive). + + Raises: + Error: If the dtype is not a integer type. + Error: If high <= 0. + + Parameters: + dtype: The data type of the NDArray elements. + + Args: + shape: The shape of the NDArray. + high: The maximum value of the random values. + + Returns: + An array of random integers from [0, high). + """ @parameter - if is_floattype[dtype](): - _float_rand_func[dtype](result, min, max) - elif is_inttype[dtype](): - _int_rand_func[dtype](result, min, max) - else: - raise Error( - "Invalid type provided. dtype must be either an integral or" - " floating-point type." - ) + if not dtype.is_integral(): + raise Error("Only Integral values can be sampled using this function.") + + if high <= 0: + raise Error("High must be greater than 0.") + + var result: NDArray[dtype] = NDArray[dtype](shape) + + builtin_random.randint[dtype]( + ptr=result._buf.ptr, size=result.size, low=0, high=high - 1 + ) return result^ +# ===----------------------------------------------------------------------=== # +# Normal distribution +# ===----------------------------------------------------------------------=== # + + fn randn[ dtype: DType = DType.float64 -]( - *shape: Int, mean: Scalar[dtype] = 0, variance: Scalar[dtype] = 1 -) raises -> NDArray[dtype]: +](shape: NDArrayShape) raises -> NDArray[dtype]: """ - Generate a random NDArray of the given shape and dtype with values having a mean and variance. - - Example: - ```py - var arr = numojo.core.random.rand_meanvar[numojo.i16](3,2,4, mean=0, variance=1) - print(arr) - ``` + Creates an array of the given shape and populate it with random samples from + a standard normal distribution. Parameters: dtype: The data type of the NDArray elements. Args: shape: The shape of the NDArray. - mean: The mean value of the random values. - variance: The variance of the random values. Returns: - The generated NDArray of type `dtype` filled with random values having a mean and variance. + An array of the given shape and populate it with random samples from + a standard normal distribution. """ + builtin_random.seed() + var result: NDArray[dtype] = NDArray[dtype](shape) + builtin_random.randn[dtype]( ptr=result._buf.ptr, size=result.size, - mean=mean.cast[DType.float64](), - variance=variance.cast[DType.float64](), ) + return result^ +fn randn[dtype: DType = DType.float64](*shape: Int) raises -> NDArray[dtype]: + """ + Overloads the function `randn(shape: NDArrayShape)`. + Creates an array of the given shape and populate it with random samples from + a standard normal distribution. + """ + return randn[dtype](NDArrayShape(shape)) + + fn randn[ dtype: DType = DType.float64 ]( - shape: List[Int], mean: Scalar[dtype] = 0, variance: Scalar[dtype] = 1 + shape: NDArrayShape, mean: Scalar[dtype], variance: Scalar[dtype] ) raises -> NDArray[dtype]: """ - Generate a random NDArray of the given shape and dtype with values having a mean and variance. - - Example: - ```py - var arr = numojo.core.random.rand_meanvar[numojo.i16](List[Int](3,2,4), mean=0, variance=1) - print(arr) - ``` + Creates an array of the given shape and populate it with random samples from + a normal distribution with given mean and variance. Parameters: dtype: The data type of the NDArray elements. @@ -241,28 +319,65 @@ fn randn[ variance: The variance of the random values. Returns: - The generated NDArray of type `dtype` filled with random values having a mean and variance. + An array of the given shape and populate it with random samples from + a normal distribution with given mean and variance. """ + builtin_random.seed() + var result: NDArray[dtype] = NDArray[dtype](shape) + builtin_random.randn[dtype]( ptr=result._buf.ptr, size=result.size, mean=mean.cast[DType.float64](), variance=variance.cast[DType.float64](), ) + return result^ -fn rand_exponential[ +fn randn[ + dtype: DType = DType.float64 +](*shape: Int, mean: Scalar[dtype], variance: Scalar[dtype]) raises -> NDArray[ + dtype +]: + """ + Overloads the function `randn(shape: NDArrayShape, mean, variance)`. + Creates an array of the given shape and populate it with random samples from + a normal distribution with given mean and variance. + """ + return randn[dtype](NDArrayShape(shape), mean=mean, variance=variance) + + +fn randn[ dtype: DType = DType.float64 -](*shape: Int, rate: Scalar[dtype] = 1.0) raises -> NDArray[dtype]: +]( + shape: List[Int], mean: Scalar[dtype], variance: Scalar[dtype] +) raises -> NDArray[dtype]: """ - Generate a random NDArray of the given shape and dtype with values from an exponential distribution. + Overloads the function `randn(shape: NDArrayShape, mean, variance)`. + Creates an array of the given shape and populate it with random samples from + a normal distribution with given mean and variance. + """ + return randn[dtype](NDArrayShape(shape), mean=mean, variance=variance) + + +# ===----------------------------------------------------------------------=== # +# Exponential distribution +# ===----------------------------------------------------------------------=== # + + +fn exponential[ + dtype: DType = DType.float64 +](shape: NDArrayShape, scale: Scalar[dtype] = 1.0) raises -> NDArray[dtype]: + """ + Creates an array of the given shape and populate it with random samples from + an exponential distribution with given scale parameter. Example: ```py - var arr = numojo.core.random.rand_exponential[numojo.f64](3, 2, 4, rate=2.0) + var arr = numojo.random.exponential(Shape(3, 2, 4), 2.0) print(arr) ``` @@ -271,48 +386,99 @@ fn rand_exponential[ Args: shape: The shape of the NDArray. - rate: The rate parameter of the exponential distribution (lambda). + scale: The scale parameter of the exponential distribution (lambda). Returns: - The generated NDArray of type `dtype` filled with random values from an exponential distribution. + An array of the given shape and populate it with random samples from + an exponential distribution with given scale parameter. """ + + @parameter + if not dtype.is_floating_point(): + raise Error( + "Invalid type provided. dtype must be a floating-point type." + ) + builtin_random.seed() var result = NDArray[dtype](NDArrayShape(shape)) - for i in range(result.num_elements()): + for i in range(result.size): var u = builtin_random.random_float64().cast[dtype]() - (result._buf.ptr + i).init_pointee_copy(-mt.log(1 - u) / rate) + (result._buf.ptr + i).init_pointee_copy(-mt.log(u) / scale) return result^ -fn rand_exponential[ +fn exponential[ dtype: DType = DType.float64 -](shape: List[Int], rate: Scalar[dtype] = 1.0) raises -> NDArray[dtype]: +](*shape: Int, scale: Scalar[dtype] = 1.0) raises -> NDArray[dtype]: + """ + Overloads the function `exponential(shape: NDArrayShape, rate)`. + Creates an array of the given shape and populate it with random samples from + an exponential distribution with given scale parameter. """ - Generate a random NDArray of the given shape and dtype with values from an exponential distribution. - Example: - ```py - var arr = numojo.core.random.rand_exponential[numojo.f64](List[Int](3, 2, 4), rate=2.0) - print(arr) - ``` + return exponential[dtype](NDArrayShape(shape), scale=scale) + + +fn exponential[ + dtype: DType = DType.float64 +](shape: List[Int], scale: Scalar[dtype] = 1.0) raises -> NDArray[dtype]: + """ + Overloads the function `exponential(shape: NDArrayShape, rate)`. + Creates an array of the given shape and populate it with random samples from + an exponential distribution with given scale parameter. + """ + + return exponential[dtype](NDArrayShape(shape), scale=scale) + + +# ===----------------------------------------------------------------------=== # +# To be deprecated +# ===----------------------------------------------------------------------=== # + + +@parameter +fn _int_rand_func[ + dtype: DType +](mut result: NDArray[dtype], min: Scalar[dtype], max: Scalar[dtype]): + """ + Generate random integers between `min` and `max` and store them in the given NDArray. Parameters: - dtype: The data type of the NDArray elements. + dtype: The data type of the random integers. Args: - shape: The shape of the NDArray as a List[Int]. - rate: The rate parameter of the exponential distribution (lambda). + result: The NDArray to store the random integers. + min: The minimum value of the random integers. + max: The maximum value of the random integers. + """ + builtin_random.randint[dtype]( + ptr=result._buf.ptr, + size=result.size, + low=int(min), + high=int(max), + ) - Returns: - The generated NDArray of type `dtype` filled with random values from an exponential distribution. + +@parameter +fn _float_rand_func[ + dtype: DType +](mut result: NDArray[dtype], min: Scalar[dtype], max: Scalar[dtype]): """ - builtin_random.seed() - var result = NDArray[dtype](shape) + Generate random floating-point numbers between `min` and `max` and + store them in the given NDArray. - for i in range(result.num_elements()): - var u = builtin_random.random_float64().cast[dtype]() - (result._buf.ptr + i).init_pointee_copy(-mt.log(1 - u) / rate) + Parameters: + dtype: The data type of the random floating-point numbers. - return result^ + Args: + result: The NDArray to store the random floating-point numbers. + min: The minimum value of the random floating-point numbers. + max: The maximum value of the random floating-point numbers. + """ + for i in range(result.size): + var temp: Scalar[dtype] = builtin_random.random_float64( + min.cast[f64](), max.cast[f64]() + ).cast[dtype]() + (result._buf.ptr + i).init_pointee_copy(temp) diff --git a/tests/routines/test_random.mojo b/tests/routines/test_random.mojo index bc167141..365700c9 100644 --- a/tests/routines/test_random.mojo +++ b/tests/routines/test_random.mojo @@ -1,4 +1,5 @@ import numojo as nm +from numojo.prelude import * from python import Python, PythonObject from utils_for_test import check, check_is_close from testing.testing import assert_true, assert_almost_equal @@ -32,6 +33,26 @@ def test_randminmax(): ) +def test_randint(): + """Test random int array generation with min and max values.""" + var arr_low_high = nm.random.randint(Shape(10, 10, 10), 0, 10) + var arr_high = nm.random.randint(Shape(10, 10, 10), 6) + var arr_low_high_mean = nm.mean(arr_low_high) + var arr_high_mean = nm.mean(arr_high) + assert_almost_equal( + arr_low_high_mean, + 4.5, + msg="Mean of `nm.random.randint(Shape(10, 10), 0, 10)` breaks", + atol=0.1, + ) + assert_almost_equal( + arr_high_mean, + 2.5, + msg="Mean of `nm.random.randint(Shape(10, 10), 6)` breaks", + atol=0.1, + ) + + def test_randn(): """Test random array generation with normal distribution.""" var arr_variadic_01 = nm.random.randn[nm.f64]( @@ -150,9 +171,11 @@ def test_randn_list(): def test_rand_exponential(): """Test random array generation with exponential distribution.""" - var arr_variadic = nm.random.rand_exponential[nm.f64](20, 20, 20, rate=2.0) - var arr_list = nm.random.rand_exponential[nm.f64]( - List[Int](20, 20, 20), rate=0.5 + var arr_variadic = nm.random.exponential[nm.f64]( + Shape(20, 20, 20), scale=2.0 + ) + var arr_list = nm.random.exponential[nm.f64]( + List[Int](20, 20, 20), scale=0.5 ) var arr_variadic_mean = nm.mean(arr_variadic) From b380bd0b29b14b412945ae3df350217fdf011623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 23:29:59 +0100 Subject: [PATCH 11/13] Fix `randn` problem --- numojo/routines/random.mojo | 13 +------------ tests/routines/test_random.mojo | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 30 deletions(-) diff --git a/numojo/routines/random.mojo b/numojo/routines/random.mojo index 366de311..97194094 100644 --- a/numojo/routines/random.mojo +++ b/numojo/routines/random.mojo @@ -323,18 +323,7 @@ fn randn[ a normal distribution with given mean and variance. """ - builtin_random.seed() - - var result: NDArray[dtype] = NDArray[dtype](shape) - - builtin_random.randn[dtype]( - ptr=result._buf.ptr, - size=result.size, - mean=mean.cast[DType.float64](), - variance=variance.cast[DType.float64](), - ) - - return result^ + return randn[dtype](shape) * mt.sqrt(variance) + mean fn randn[ diff --git a/tests/routines/test_random.mojo b/tests/routines/test_random.mojo index 365700c9..9c741000 100644 --- a/tests/routines/test_random.mojo +++ b/tests/routines/test_random.mojo @@ -1,3 +1,4 @@ +from math import sqrt import numojo as nm from numojo.prelude import * from python import Python, PythonObject @@ -55,15 +56,11 @@ def test_randint(): def test_randn(): """Test random array generation with normal distribution.""" - var arr_variadic_01 = nm.random.randn[nm.f64]( - 20, 20, 20, mean=0.0, variance=1.0 - ) + var arr_variadic_01 = nm.random.randn[nm.f64](20, 20, 20) var arr_variadic_31 = nm.random.randn[nm.f64]( - 20, 20, 20, mean=3.0, variance=1.0 - ) - var arr_variadic_12 = nm.random.randn[nm.f64]( - 20, 20, 20, mean=1.0, variance=2.0 + Shape(20, 20, 20), mean=3, variance=1 ) + var arr_variadic_12 = nm.random.randn[nm.f64](Shape(20, 20, 20), 1, 2) var arr_variadic_mean01 = nm.mean(arr_variadic_01) var arr_variadic_mean31 = nm.mean(arr_variadic_31) @@ -113,15 +110,9 @@ def test_randn(): def test_randn_list(): """Test random array generation with normal distribution.""" - var arr_list_01 = nm.random.randn[nm.f64]( - List[Int](20, 20, 20), mean=0.0, variance=1.0 - ) - var arr_list_31 = nm.random.randn[nm.f64]( - List[Int](20, 20, 20), mean=3.0, variance=1.0 - ) - var arr_list_12 = nm.random.randn[nm.f64]( - List[Int](20, 20, 20), mean=1.0, variance=2.0 - ) + var arr_list_01 = nm.random.randn[nm.f64](Shape(20, 20, 20)) + var arr_list_31 = nm.random.randn[nm.f64](Shape(20, 20, 20)) + 3 + var arr_list_12 = nm.random.randn[nm.f64](Shape(20, 20, 20)) * sqrt(2.0) + 1 var arr_list_mean01 = nm.mean(arr_list_01) var arr_list_mean31 = nm.mean(arr_list_31) @@ -201,13 +192,13 @@ def test_rand_exponential(): assert_almost_equal( arr_variadic_var, - 1 / 2, + 1 / 2**2, msg="Variance of exponential distribution with rate 2.0", atol=0.1, ) assert_almost_equal( arr_list_var, - 1 / 0.5, + 1 / 0.5**2, msg="Variance of exponential distribution with rate 0.5", atol=0.5, ) From 17b4c5df091012a857541810d5fb72fbdeede1f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Tue, 28 Jan 2025 23:51:53 +0100 Subject: [PATCH 12/13] Add corresponding methods --- numojo/core/ndarray.mojo | 33 ++++++++++++++++ numojo/routines/statistics/averages.mojo | 50 ++++++++++++++++++++---- 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/numojo/core/ndarray.mojo b/numojo/core/ndarray.mojo index 5e5c294e..103d6827 100644 --- a/numojo/core/ndarray.mojo +++ b/numojo/core/ndarray.mojo @@ -3423,6 +3423,25 @@ struct NDArray[dtype: DType = DType.float64]( var I = NDArray[DType.index](self.shape) sorting._sort_inplace(self, I, axis=axis) + fn std[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: + """ + Compute the standard deviation. + + Args: + ddof: Delta degree of freedom. + """ + + if ddof >= self.size: + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, self.size + ) + ) + + return variance[returned_dtype](self, ddof=ddof) ** 0.5 + fn sum(self: Self) raises -> Scalar[dtype]: """ Sum of all array elements. @@ -3520,6 +3539,20 @@ struct NDArray[dtype: DType = DType.float64]( """ return self._buf.ptr + fn variance[ + returned_dtype: DType = DType.float64 + ](self, ddof: Int = 0) raises -> Scalar[returned_dtype]: + """ + Returns the variance of array. + + Parameters: + returned_dtype: The returned data type, defaulting to float64. + + Args: + ddof: Delta degree of freedom. + """ + return variance[returned_dtype](self, ddof=ddof) + # ===----------------------------------------------------------------------===# # NDArrayIterator diff --git a/numojo/routines/statistics/averages.mojo b/numojo/routines/statistics/averages.mojo index a9fc54f4..2bc4416c 100644 --- a/numojo/routines/statistics/averages.mojo +++ b/numojo/routines/statistics/averages.mojo @@ -97,7 +97,6 @@ fn mean[ """ Calculate the arithmetic average of a Matrix along the axis. - Parameters: dtype: The element type. returned_dtype: The returned data type, defaulting to float64. @@ -190,7 +189,11 @@ fn std[ """ if ddof >= A.size: - raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) return variance[returned_dtype](A, ddof=ddof) ** 0.5 @@ -201,13 +204,21 @@ fn std[ """ Compute the standard deviation. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: A: Matrix. ddof: Delta degree of freedom. """ if ddof >= A.size: - raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) return variance[returned_dtype](A, ddof=ddof) ** 0.5 @@ -218,6 +229,10 @@ fn std[ """ Compute the standard deviation along axis. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: A: Matrix. axis: 0 or 1. @@ -233,13 +248,21 @@ fn variance[ """ Compute the variance. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: A: An array. ddof: Delta degree of freedom. """ if ddof >= A.size: - raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) return sum( (A.astype[returned_dtype]() - mean[returned_dtype](A)) @@ -253,13 +276,21 @@ fn variance[ """ Compute the variance. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: A: Matrix. ddof: Delta degree of freedom. """ if ddof >= A.size: - raise Error(String("ddof {ddof} should be smaller than size {A.size}")) + raise Error( + String("ddof {} should be smaller than size {}").format( + ddof, A.size + ) + ) return sum( (A.astype[returned_dtype]() - mean[returned_dtype](A)) @@ -273,6 +304,10 @@ fn variance[ """ Compute the variance along axis. + Parameters: + dtype: The element type. + returned_dtype: The returned data type, defaulting to float64. + Args: A: Matrix. axis: 0 or 1. @@ -281,9 +316,8 @@ fn variance[ if (ddof >= A.shape[0]) or (ddof >= A.shape[1]): raise Error( - String( - "ddof {ddof} should be smaller than size" - " {A.shape[0]}x{A.shape[1]}" + String("ddof {} should be smaller than size {}x{}").format( + ddof, A.shape[0], A.shape[1] ) ) From 09587fb4a9d2eceb3c7456c4b0d3d0040ac81be0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?ZHU=20Yuhao=20=E6=9C=B1=E5=AE=87=E6=B5=A9?= Date: Wed, 29 Jan 2025 00:01:38 +0100 Subject: [PATCH 13/13] Format files --- numojo/routines/random.mojo | 2 ++ 1 file changed, 2 insertions(+) diff --git a/numojo/routines/random.mojo b/numojo/routines/random.mojo index 60cfd15a..97194094 100644 --- a/numojo/routines/random.mojo +++ b/numojo/routines/random.mojo @@ -174,10 +174,12 @@ fn rand[ """ return rand[dtype](NDArrayShape(shape), min=min, max=max) + # ===----------------------------------------------------------------------=== # # Discrete integers # ===----------------------------------------------------------------------=== # + fn randint[ dtype: DType = DType.int64 ](shape: NDArrayShape, low: Int, high: Int) raises -> NDArray[dtype]: