Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 51 additions & 29 deletions numojo/core/ndarray.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@ import builtin.bool as builtin_bool
from builtin.type_aliases import Origin
from collections import Dict
from collections.optional import Optional
from math import log10
from memory import UnsafePointer, memset_zero, memcpy
from python import Python, PythonObject
from math import log10
from python import PythonObject
from sys import simdwidthof
from tensor import Tensor
from utils import Variant
from utils.numerics import isnan, isinf

import numojo.core._array_funcs as _af
from numojo.core._math_funcs import Vectorized
from numojo.core.datatypes import TypeCoercion, _concise_dtype_str
from numojo.core.item import Item
from numojo.core.ndshape import NDArrayShape
from numojo.core.ndstrides import NDArrayStrides
from numojo.core.item import Item
from numojo.core.own_data import OwnData
from numojo.core._math_funcs import Vectorized
from numojo.core.utility import (
_get_offset,
_update_flags,
Expand All @@ -36,32 +36,24 @@ from numojo.core.utility import (
to_numpy,
to_tensor,
bool_to_numeric,
is_floattype,
)

import numojo.routines.bitwise as bitwise
import numojo.routines.creation as creation
from numojo.routines.io.formatting import (
format_floating_precision,
format_floating_scientific,
format_value,
PrintOptions,
printoptions,
GLOBAL_PRINT_OPTIONS,
)
import numojo.routines.creation as creation
import numojo.routines.creation as creation
import numojo.routines.sorting as sorting
import numojo.routines.math.arithmetic as arithmetic
import numojo.routines.logic.comparison as comparison
import numojo.routines.math.rounding as rounding
import numojo.routines.bitwise as bitwise
import numojo.routines.linalg as linalg
from numojo.core.datatypes import TypeCoercion, _concise_dtype_str
from numojo.routines.statistics.averages import mean
from numojo.routines.math.products import prod, cumprod
from numojo.routines.math.sums import sum, cumsum
from numojo.routines.logic.truth import any
from numojo.routines.linalg.products import matmul
import numojo.routines.logic.comparison as comparison
from numojo.routines.manipulation import reshape, ravel
import numojo.routines.math.arithmetic as arithmetic
from numojo.routines.math.products import prod, cumprod
import numojo.routines.math.rounding as rounding
from numojo.routines.math.sums import sum, cumsum
import numojo.routines.sorting as sorting
from numojo.routines.statistics.averages import mean

# ===----------------------------------------------------------------------===#
# NDArray
Expand Down Expand Up @@ -3426,19 +3418,33 @@ struct NDArray[dtype: DType = DType.float64](
](self, ddof: Int = 0) raises -> Scalar[returned_dtype]:
"""
Compute the standard deviation.
See `numojo.std`.

Parameters:
returned_dtype: The returned data type, defaulting to float64.

Args:
ddof: Delta degree of freedom.
"""

if ddof >= self.size:
raise Error(
String("ddof {} should be smaller than size {}").format(
ddof, self.size
)
)
return std[returned_dtype](self, ddof=ddof)

return variance[returned_dtype](self, ddof=ddof) ** 0.5
fn std[
returned_dtype: DType = DType.float64
](self, axis: Int, ddof: Int = 0) raises -> NDArray[returned_dtype]:
"""
Compute the standard deviation along the axis.
See `numojo.std`.

Parameters:
returned_dtype: The returned data type, defaulting to float64.

Args:
axis: The axis along which the mean is performed.
ddof: Delta degree of freedom.
"""

return std[returned_dtype](self, axis=axis, ddof=ddof)

fn sum(self: Self) raises -> Scalar[dtype]:
"""
Expand Down Expand Up @@ -3551,6 +3557,22 @@ struct NDArray[dtype: DType = DType.float64](
"""
return variance[returned_dtype](self, ddof=ddof)

fn variance[
returned_dtype: DType = DType.float64
](self, axis: Int, ddof: Int = 0) raises -> NDArray[returned_dtype]:
"""
Returns the variance of array along the axis.
See `numojo.variance`.

Parameters:
returned_dtype: The returned data type, defaulting to float64.

Args:
axis: The axis along which the mean is performed.
ddof: Delta degree of freedom.
"""
return variance[returned_dtype](self, axis=axis, ddof=ddof)


# ===----------------------------------------------------------------------===#
# NDArrayIterator
Expand Down
45 changes: 44 additions & 1 deletion numojo/routines/manipulation.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ fn broadcast_to[
# We compare the shape from the trailing dimensions.

var b_strides = NDArrayStrides(
shape
ndim=len(shape), initialized=False
) # Strides of b when refer to data of a

for i in range(a.shape.ndim):
Expand Down Expand Up @@ -418,6 +418,49 @@ fn broadcast_to[
return B^


fn _broadcast_back_to[
dtype: DType
](a: NDArray[dtype], shape: NDArrayShape, axis: Int) raises -> NDArray[dtype]:
"""
Broadcasts the array back to the given shape.
If array `b` is the result of array `a` operated along an axis,
it has one dimension less than `a`.
This function can broadcast `b` back to the shape of `a`.
It is a temporary function and should not be used by users.
When `OwnData` is supported, this function will be removed.
Whether broadcasting is possible or not is not checked.
"""

var a_shape = shape
a_shape[axis] = 1

var b_strides = NDArrayStrides(
a_shape
) # Strides of b when refer to data of a
b_strides[axis] = 0

# Start broadcasting.

var b = NDArray[dtype](shape) # Construct array of targeted shape.

# Iterate all items in the new array and fill in correct values.
for offset in range(b.size):
var remainder = offset
var indices = Item(ndim=b.ndim, initialized=False)

for i in range(b.ndim):
indices[i], remainder = divmod(
remainder,
b.strides[i],
)

(b._buf.ptr + offset).init_pointee_copy(
a._buf.ptr[_get_offset(indices, b_strides)]
)

return b^


# ===----------------------------------------------------------------------=== #
# Rearranging elements
# ===----------------------------------------------------------------------=== #
Expand Down
113 changes: 113 additions & 0 deletions numojo/routines/statistics/averages.mojo
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# ===----------------------------------------------------------------------=== #
# Distributed under the Apache 2.0 License with LLVM Exceptions.
# See LICENSE and the LLVM License for more information.
# https://github.com/Mojo-Numerics-and-Algorithms-group/NuMojo/blob/main/LICENSE
# https://llvm.org/LICENSE.txt
# ===----------------------------------------------------------------------=== #
"""
Averages and variances
"""
Expand All @@ -13,6 +19,7 @@ import numojo.core.matrix as matrix
from numojo.core.matrix import Matrix
from numojo.core.utility import bool_to_numeric
from numojo.routines.logic.comparison import greater, less
from numojo.routines.manipulation import broadcast_to, _broadcast_back_to
from numojo.routines.math.arithmetic import add
from numojo.routines.math.sums import sum, cumsum
import numojo.routines.math.misc as misc
Expand Down Expand Up @@ -183,6 +190,10 @@ fn std[
"""
Compute the standard deviation.

Parameters:
dtype: The element type.
returned_dtype: The returned data type, defaulting to float64.

Args:
A: An array.
ddof: Delta degree of freedom.
Expand All @@ -198,6 +209,48 @@ fn std[
return variance[returned_dtype](A, ddof=ddof) ** 0.5


fn std[
dtype: DType, //, returned_dtype: DType = DType.float64
](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[
returned_dtype
]:
"""
Computes the standard deviation along the axis.

Parameters:
dtype: The element type.
returned_dtype: The returned data type, defaulting to float64.

Args:
A: An array.
axis: The axis along which the mean is performed.
ddof: Delta degree of freedom.

Returns:
An array.

Raises:
Error: If the axis is out of bounds.
Error: If ddof is not smaller than the size of the axis.
"""

var normalized_axis = axis
if normalized_axis < 0:
normalized_axis += A.ndim
if (normalized_axis >= A.ndim) or (normalized_axis < 0):
raise Error(String("Axis {} out of bounds!").format(axis))

for i in range(A.ndim):
if ddof >= A.shape[i]:
raise Error(
String(
"ddof ({}) should be smaller than size ({}) of axis ({})"
).format(ddof, A.shape[i], i)
)

return variance[returned_dtype](A, axis=normalized_axis, ddof=ddof) ** 0.5


fn std[
dtype: DType, //, returned_dtype: DType = DType.float64
](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]:
Expand Down Expand Up @@ -270,6 +323,66 @@ fn variance[
) / (A.size - ddof)


fn variance[
dtype: DType, //, returned_dtype: DType = DType.float64
](A: NDArray[dtype], axis: Int, ddof: Int = 0) raises -> NDArray[
returned_dtype
]:
"""
Computes the variance along the axis.

Parameters:
dtype: The element type.
returned_dtype: The returned data type, defaulting to float64.

Args:
A: An array.
axis: The axis along which the mean is performed.
ddof: Delta degree of freedom.

Returns:
An array.

Raises:
Error: If the axis is out of bounds.
Error: If ddof is not smaller than the size of the axis.
"""

var normalized_axis = axis
if normalized_axis < 0:
normalized_axis += A.ndim
if (normalized_axis >= A.ndim) or (normalized_axis < 0):
raise Error(String("Axis {} out of bounds!").format(axis))

for i in range(A.ndim):
if ddof >= A.shape[i]:
raise Error(
String(
"ddof ({}) should be smaller than size ({}) of axis ({})"
).format(ddof, A.shape[i], i)
)

return sum(
(
A.astype[returned_dtype]()
- _broadcast_back_to(
mean[returned_dtype](A, axis=normalized_axis),
A.shape,
axis=normalized_axis,
)
)
* (
A.astype[returned_dtype]()
- _broadcast_back_to(
mean[returned_dtype](A, axis=normalized_axis),
A.shape,
axis=normalized_axis,
)
),
axis=normalized_axis,
) / (A.shape[normalized_axis] - ddof)


fn variance[
dtype: DType, //, returned_dtype: DType = DType.float64
](A: Matrix[dtype], ddof: Int = 0) raises -> Scalar[returned_dtype]:
Expand Down
16 changes: 14 additions & 2 deletions tests/routines/test_statistics.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ from utils_for_test import check, check_is_close

def test_mean_median_var_std():
var np = Python.import_module("numpy")
var A = nm.random.randn(10, 10)
var A = nm.random.randn(3, 4, 5)
var Anp = A.to_numpy()

assert_true(
np.all(np.isclose(nm.mean(A), np.mean(Anp), atol=0.1)),
"`mean` is broken",
)
for i in range(2):
for i in range(3):
check_is_close(
nm.mean(A, i),
np.mean(Anp, i),
Expand All @@ -35,8 +35,20 @@ def test_mean_median_var_std():
np.all(np.isclose(nm.variance(A), np.`var`(Anp), atol=0.1)),
"`variance` is broken",
)
for i in range(3):
check_is_close(
nm.variance(A, i),
np.`var`(Anp, i),
String("`variance` is broken for {}-dimension".format(i)),
)

assert_true(
np.all(np.isclose(nm.std(A), np.std(Anp), atol=0.1)),
"`std` is broken",
)
for i in range(3):
check_is_close(
nm.std(A, i),
np.std(Anp, i),
String("`std` is broken for {}-dimension".format(i)),
)