Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 115 additions & 53 deletions numojo/routines/linalg/decompositions.mojo
Original file line number Diff line number Diff line change
@@ -1,57 +1,75 @@
# ===----------------------------------------------------------------------=== #
# Decompositions
# ===----------------------------------------------------------------------=== #
from sys import simdwidthof
from algorithm import parallelize, vectorize

from algorithm import parallelize
import math as builtin_math

from numojo.core.ndarray import NDArray
from numojo.core.matrix import Matrix
from numojo.routines.creation import zeros, eye, full


fn compute_householder[
fn _compute_householder[
dtype: DType
](
mut H: Matrix[dtype], mut R: Matrix[dtype], row: Int, column: Int
) raises -> None:
var sqrt2: SIMD[dtype, 1] = 1.4142135623730951
](mut H: Matrix[dtype], mut R: Matrix[dtype], work_index: Int) raises -> None:
alias simd_width = simdwidthof[dtype]()
alias sqrt2: Scalar[dtype] = 1.4142135623730951
var rRows = R.shape[0]

for i in range(row, rRows):
var val = R._load(i, column)
H._store(i, column, val)
R._store(i, column, 0.0)
@parameter
fn load_store_vec[n_elements: Int](i: Int):
var r_value = R._load[n_elements](i + work_index, work_index)
H._store[n_elements](i + work_index, work_index, r_value)
R._store[n_elements](i + work_index, work_index, 0.0)

vectorize[load_store_vec, simd_width](rRows - work_index)

var norm = Scalar[dtype](0)

@parameter
fn calculate_norm[width: Int](i: Int):
norm += (H._load[width=width](i, work_index) ** 2).reduce_add()

vectorize[calculate_norm, simd_width](rRows)

var norm: Scalar[dtype] = 0.0
for i in range(rRows):
norm += H._load(i, column) ** 2
norm = builtin_math.sqrt(norm)
if row == rRows - 1 or norm == 0:
first_element = H._load(row, column)
R._store(row, column, -first_element)
H._store(row, column, sqrt2)
if work_index == rRows - 1 or norm == 0:
first_element = H._load(work_index, work_index)
R._store(work_index, work_index, -first_element)
H._store(work_index, work_index, sqrt2)
return

scale = 1.0 / norm
if H._load(row, column) < 0:
scale = -scale
var scaling_factor = 1.0 / norm
if H._load(work_index, work_index) < 0:
scaling_factor = -scaling_factor

R._store(row, column, -1 / scale)
R._store(work_index, work_index, -1 / scaling_factor)

for i in range(row, rRows):
H._store(i, column, H._load(i, column) * scale)
@parameter
fn scaling_factor_vec[simd_width: Int](i: Int):
H._store[simd_width](
i, work_index, H._load[simd_width](i, work_index) * scaling_factor
)

increment = H._load(row, column) + 1.0
H._store(row, column, increment)
vectorize[scaling_factor_vec, simd_width](rRows)

s = builtin_math.sqrt(1.0 / increment)
increment = H._load(work_index, work_index) + 1.0
H._store(work_index, work_index, increment)

for i in range(row, rRows):
H._store(i, column, H._load(i, column) * s)
scaling_factor = builtin_math.sqrt(1.0 / increment)

@parameter
fn scaling_factor_increment_vec[simd_width: Int](i: Int):
H._store[simd_width](
i, work_index, H._load[simd_width](i, work_index) * scaling_factor
)

vectorize[scaling_factor_increment_vec, simd_width](rRows)


fn compute_qr[
fn _apply_householder[
dtype: DType
](
mut H: Matrix[dtype],
Expand All @@ -62,15 +80,25 @@ fn compute_qr[
) raises -> None:
var aRows = A.shape[0]
var aCols = A.shape[1]

alias simdwidth = simdwidthof[dtype]()
for j in range(column_start, aCols):
var dot: SIMD[dtype, 1] = 0.0
for i in range(row_start, aRows):
dot += H._load(i, work_index) * A._load(i, j)
for i in range(row_start, aRows):
val = A._load(i, j) - H._load(i, work_index) * dot

@parameter
fn calculate_norm[width: Int](i: Int):
dot += (
H._load[width=width](i, work_index) * A._load[width=width](i, j)
).reduce_add()

vectorize[calculate_norm, simdwidth](aRows)

@parameter
fn closure[width: Int](i: Int):
val = A._load[width](i, j) - H._load[width](i, work_index) * dot
A._store(i, j, val)

vectorize[closure, simdwidth](aRows)


fn lu_decomposition[
dtype: DType
Expand Down Expand Up @@ -296,36 +324,70 @@ fn partial_pivoting[

fn qr[
dtype: DType
](owned A: Matrix[dtype]) raises -> Tuple[Matrix[dtype], Matrix[dtype]]:
](A: Matrix[dtype], mode: String = "reduced") raises -> Tuple[
Matrix[dtype], Matrix[dtype]
]:
"""
Compute the QR decomposition of a matrix.

Decompose the matrix `A` as `QR`, where `Q` is orthonormal and `R` is upper-triangular.
This function is similar to `numpy.linalg.qr`.
Computes the QR decomposition using Householder transformations. For best
performance, the input matrix should be in column-major order.

Args:
A: The input matrix to be factorized.

A: The input matrix.
mode: The mode of the decomposition. Can be "complete" or "reduced" simillar to numpy's QR decomposition.
- "complete": Returns Q and R such that A = QR, where Q is m x m and R is m x n.
- "reduced": Returns Q and R such that A = QR, where Q is m x min(m,n) and R is min(m,n) x n.
Returns:
A tuple containing the orthonormal matrix `Q` and
the upper-triangular matrix `R`.
A tuple `(Q, R)` where `Q` is orthonormal and `R` is upper-triangular.
"""
var inner: Int
var reorder: Bool = False
var reduce: Bool = False

var m = A.shape[0]
var n = A.shape[1]

var Q = Matrix.full[dtype](shape=(m, m))
for i in range(m):
Q._store(i, i, 1.0)

var min_n = min(m, n)

var H = Matrix.full[dtype](shape=(m, min_n))
if mode == "complete" or m == n:
reduce = False
inner = m
elif mode == "reduced":
reduce = True
inner = min_n
else:
raise Error(String("Invalid mode: {}").format(mode))

var R: Matrix[dtype] = A

if A.flags.C_CONTIGUOUS:
reorder = True

if reorder:
R = A.reorder_layout()
else:
R = A

var H = Matrix.zeros[dtype](shape=(m, min_n), order="F")

for i in range(min_n):
compute_householder(H, A, i, i)
compute_qr(H, i, A, i, i + 1)
_compute_householder(H, R, i)
_apply_householder(H, i, R, i, i + 1)

for i in range(min_n - 1, -1, -1):
compute_qr(H, i, Q, i, i)
var Q = Matrix.zeros[dtype]((m, inner), order="F")
for i in range(inner):
Q[i, i] = 1.0

return Q, A
for i in range(min_n - 1, -1, -1):
_apply_householder(H, i, Q, i, i)

if reorder:
Q = Q.reorder_layout()
if reduce:
R = R[:inner, :].reorder_layout()
else:
R = R.reorder_layout()
else:
if reduce:
R = R[:inner, :]

return Q^, R^
84 changes: 84 additions & 0 deletions tests/core/test_matrix.mojo
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,90 @@ def test_qr_decomposition():
assert_true(np.allclose(A_test.to_numpy(), A.to_numpy(), atol=1e-14))


def test_qr_decomposition_asym_reduced():
var np = Python.import_module("numpy")
var A = Matrix.rand[f64]((12, 5))
Q, R = nm.linalg.qr(A, mode="reduced")

assert_true(
Q.shape[0] == 12 and Q.shape[1] == 5,
"Q has unexpected shape for reduced.",
)
assert_true(
R.shape[0] == 5 and R.shape[1] == 5,
"R has unexpected shape for reduced.",
)

var id = Q.transpose() @ Q
assert_true(
np.allclose(id.to_numpy(), np.eye(Q.shape[1]), atol=1e-14),
"Q not orthonormal for reduced.",
)
assert_true(
np.allclose(R.to_numpy(), np.triu(R.to_numpy()), atol=1e-14),
"R not upper triangular for reduced.",
)

var A_test = Q @ R
assert_true(np.allclose(A_test.to_numpy(), A.to_numpy(), atol=1e-14))


def test_qr_decomposition_asym_complete():
var np = Python.import_module("numpy")
var A = Matrix.rand[f64]((12, 5))
Q, R = nm.linalg.qr(A, mode="complete")

assert_true(
Q.shape[0] == 12 and Q.shape[1] == 12,
"Q has unexpected shape for complete.",
)
assert_true(
R.shape[0] == 12 and R.shape[1] == 5,
"R has unexpected shape for complete.",
)

var id = Q.transpose() @ Q
assert_true(
np.allclose(id.to_numpy(), np.eye(Q.shape[0]), atol=1e-14),
"Q not orthonormal for complete.",
)
assert_true(
np.allclose(R.to_numpy(), np.triu(R.to_numpy()), atol=1e-14),
"R not upper triangular for complete.",
)

var A_test = Q @ R
assert_true(np.allclose(A_test.to_numpy(), A.to_numpy(), atol=1e-14))


def test_qr_decomposition_asym_complete2():
var np = Python.import_module("numpy")
var A = Matrix.rand[f64]((5, 12))
Q, R = nm.linalg.qr(A, mode="complete")

assert_true(
Q.shape[0] == 5 and Q.shape[1] == 5,
"Q has unexpected shape for complete.",
)
assert_true(
R.shape[0] == 5 and R.shape[1] == 12,
"R has unexpected shape for complete.",
)

var id = Q.transpose() @ Q
assert_true(
np.allclose(id.to_numpy(), np.eye(Q.shape[0]), atol=1e-14),
"Q not orthonormal for complete.",
)
assert_true(
np.allclose(R.to_numpy(), np.triu(R.to_numpy()), atol=1e-14),
"R not upper triangular for complete.",
)

var A_test = Q @ R
assert_true(np.allclose(A_test.to_numpy(), A.to_numpy(), atol=1e-14))


# ===-----------------------------------------------------------------------===#
# Mathematics
# ===-----------------------------------------------------------------------===#
Expand Down