-
Notifications
You must be signed in to change notification settings - Fork 441
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SORCSD2BY1 computes inaccurate result #917
Comments
The Python code below demonstrates the problem. It requires Python 3, NumPy, and a shared LAPACK library. Call it with $ python3 lapack-issue-917.py
name of LAPACK library: liblapack.so.3
tolerance: 1.69e-07
actual value: 2.97e-05 (176.0 times tolerance)
input matrix:
[[-2.0392263e-01 -9.7898704e-01]
[ 1.1427624e-08 9.2925374e-09]
[ 9.7898704e-01 -2.0392257e-01]]
reassembled matrix:
[[-2.0392254e-01 -9.7898668e-01]
[ 6.0102630e-06 2.9068287e-05]
[ 9.7898704e-01 -2.0392255e-01]]
$ python3 lapack-issue-917.py /tmp/tmp.y7q9ZOrOZA/lib/liblapack.so.3
name of LAPACK library: /tmp/tmp.y7q9ZOrOZA/lib/liblapack.so.3
tolerance: 1.69e-07
actual value: 2.97e-05 (176.0 times tolerance)
input matrix:
[[-2.0392263e-01 -9.7898704e-01]
[ 1.1427624e-08 9.2925374e-09]
[ 9.7898704e-01 -2.0392257e-01]]
reassembled matrix:
[[-2.0392254e-01 -9.7898668e-01]
[ 6.0102630e-06 2.9068287e-05]
[ 9.7898704e-01 -2.0392255e-01]]
#!/usr/bin/python3
import ctypes
import ctypes.util
from ctypes import byref, c_char, c_float, c_int32, c_void_p, POINTER
import sys
import numpy as np
import numpy.linalg as la
def main():
if len(sys.argv) not in [1, 2]:
return "usage: python3 %s [path to LAPACK library]" % (sys.argv[0], )
if len(sys.argv) >= 2:
lapack_library = sys.argv[1]
else:
lapack_library = ctypes.util.find_library("lapack")
print("name of LAPACK library:", lapack_library)
lapack = ctypes.CDLL(lapack_library, use_errno=True)
lapack.sorcsd2by1_.restype = None
# an alias for the _F_ortran integer type
f_int = ctypes.c_int32
lapack.sorcsd2by1_.argtypes = [
POINTER(c_char),
POINTER(c_char),
POINTER(c_char),
# M
POINTER(f_int),
# N
POINTER(f_int),
# P
POINTER(f_int),
# X11
POINTER(c_float),
POINTER(f_int),
# X21
POINTER(c_float),
POINTER(f_int),
# theta
POINTER(c_float),
# U1
POINTER(c_float),
POINTER(f_int),
# U2
POINTER(c_float),
POINTER(f_int),
# V^T
POINTER(c_float),
POINTER(f_int),
# work
POINTER(c_float),
POINTER(f_int),
POINTER(f_int),
POINTER(f_int),
]
dtype = np.float32
eps = np.finfo(dtype).eps
nan = np.float32(np.nan)
m = 2 + 1
n = 2
p = 2
q = np.array(
[
[-0.203922629, -0.978987038],
[1.14276242E-08, 9.29253741E-09],
[0.978987038, -0.203922570],
],
dtype=np.float32,
order="F",
)
q_copy = q.copy(order="F")
u1 = np.full([p, p], nan, order="F")
u2 = np.full([m - p, m - p], nan, order="F")
vt = np.full([n, n], nan, order="F")
theta = np.full(n, nan)
work = np.full(256, nan)
iwork = np.full(256, 0, dtype=np.int32 if f_int == c_int32 else np.int64)
info = f_int(0)
yes = byref(c_char(b'y'))
intref = lambda n: byref(f_int(n))
lapack.sorcsd2by1_(
yes,
yes,
yes,
intref(m),
intref(n),
intref(p),
q_copy.ctypes.data_as(POINTER(c_float)),
intref(q_copy.shape[0]),
q_copy[2].ctypes.data_as(POINTER(c_float)),
intref(q_copy.shape[0]),
theta.ctypes.data_as(POINTER(c_float)),
u1.ctypes.data_as(POINTER(c_float)),
intref(u1.shape[0]),
u2.ctypes.data_as(POINTER(c_float)),
intref(u2.shape[0]),
vt.ctypes.data_as(POINTER(c_float)),
intref(vt.shape[0]),
work.ctypes.data_as(POINTER(c_float)),
intref(work.shape[0]),
iwork.ctypes.data_as(POINTER(f_int)),
byref(info),
)
assert info.value == 0
r = min(p, m - p, n, m - n)
k1 = max(n + p - m, 0)
k2 = max(n - p, 0)
assert r == 1
assert k1 == 1
assert k2 == 0
sigma_1 = np.zeros([2, 2], dtype=np.float32)
sigma_2 = np.zeros([1, 2], dtype=np.float32)
sigma_1[0, 0] = 1.0
sigma_1[1, 1] = np.cos(theta[0])
sigma_2[0, 1] = np.sin(theta[0])
reassembled_q11 = np.dot(u1, np.dot(sigma_1, vt))
reassembled_q21 = np.dot(u2, np.dot(sigma_2, vt))
reassembled_q = np.vstack([reassembled_q11, reassembled_q21])
tol = eps * la.norm(q)
delta = la.norm(reassembled_q - q)
print("tolerance: %8.2e" % tol)
print("actual value: %8.2e (%.1f times tolerance)" % (delta, delta / tol))
print("input matrix:")
print(q)
print("reassembled matrix:")
print(reassembled_q)
if __name__ == "__main__":
sys.exit(main()) |
The patch closing #634 added the assumption that the input vector [Given a vector For example when calling SORCSD2BY1 with the arguments in the first post, the SORBDB6 arguments are as follows:
|
Given the following 2x1 matrix with orthonormal columns
SORBDB5 computes the output Call the Python code below as follows (NumPy required): python3 lapack-issue-917-sorbdb5.py # uses system LAPACK
python3 lapack-issue-917-sorbdb5.py /path/to/lapack.so # uses the LAPACK library at the provided location Python code calling SORBDB5: #!/usr/bin/python3
# This file calls SORBDB5 with a unit-norm vector `x` and a matrix `Q` with a
# single unit-norm column as parameters. The vector `x` is overwritten by the
# SORBDB5 output vector `x` which is supposed to be orthogonal to the range of
# `Q`.
import ctypes
import ctypes.util
from ctypes import byref, c_char, c_float, c_int32, c_void_p, POINTER
import sys
import numpy as np
import numpy.linalg as la
def main():
if len(sys.argv) not in [1, 2]:
return "usage: python3 %s [path to LAPACK library]" % (sys.argv[0], )
if len(sys.argv) >= 2:
lapack_library = sys.argv[1]
else:
lapack_library = ctypes.util.find_library("lapack")
print("name of LAPACK library:", lapack_library)
lapack = ctypes.CDLL(lapack_library, use_errno=True)
lapack.sorbdb5_.restype = None
# an alias for the _F_ortran integer type
f_int = ctypes.c_int32
lapack.sorcsd2by1_.argtypes = [
POINTER(f_int),
POINTER(f_int),
POINTER(f_int),
# X1
POINTER(c_float),
POINTER(f_int),
# X2
POINTER(c_float),
POINTER(f_int),
# Q1
POINTER(c_float),
POINTER(f_int),
# Q2
POINTER(c_float),
POINTER(f_int),
# WORK
POINTER(c_float),
POINTER(f_int),
# INFO
POINTER(f_int),
]
eps = np.finfo(np.float32).eps
nan = np.float32(np.nan)
m1 = 2
m2 = 0
n = 1
x1 = np.array([6399 / 8192 * eps, 1 / 128 * eps], dtype=np.float32)
x1 = x1 / la.norm(x1)
x2 = np.array([], dtype=np.float32)
q1 = np.array(
[
[1 - eps / 2.],
[0],
],
dtype=np.float32,
order="F",
).reshape([2, 1])
assert np.abs(1 - np.dot(q1.T, q1)) <= eps
q2 = np.array([[]], dtype=np.float32, order="F").reshape((0, 1))
work = np.full(256, nan)
info = f_int(0)
print("vector x")
print(x1)
print("matrix with orthonormal columns Q")
print(q1)
intref = lambda n: byref(f_int(n))
lapack.sorbdb5_(
intref(m1),
intref(m2),
intref(n),
x1.ctypes.data_as(POINTER(c_float)),
intref(1),
x2.ctypes.data_as(POINTER(c_float)),
intref(1),
q1.ctypes.data_as(POINTER(c_float)),
intref(q1.shape[0]),
q2.ctypes.data_as(POINTER(c_float)),
intref(max(1, q2.shape[0])), # ldq2 must be nonnegative
work.ctypes.data_as(POINTER(c_float)),
intref(work.shape[0]),
byref(info),
)
assert info.value == 0
x1 = x1 / la.norm(x1)
tol = 2 * eps
delta = la.norm(np.dot(q1.T, x1))
print("tolerance: %.2e" % (tol, ))
print("computed vector orthogonal to ran(Q):")
print(x1)
if delta <= tol:
outcome = "ok"
else:
outcome = "FAIL (error is %.3f times epsilon)\n" % (delta / eps,)
print()
print(outcome)
if __name__ == "__main__":
sys.exit(main()) |
@langou, ContextxORCSD2BY1/xUNBDB6 calls xORBDB6/xUNBDB6 indirectly. Given a vector
where
and 0 < α < 1 is a magic constant. The assumption is that α is sufficiently large to ensure that ImplementationIn the current code
and
Solving for β one arrives at β ≈ 0.1005 and Next we can reverse the question and ask for a value of α subject to NoteI do not claim that the values above maximize the required value of α; they were just derived from the input in the issue description and modified to yield nice formula. Demo CodeThe script below allows you to play with the values: #!/usr/bin/python3
import numpy as np
import numpy.linalg as la
eps = np.finfo(np.float32).eps
alpha = 0.1
beta = 1 / (1 - alpha**2) * eps + alpha / np.sqrt(1 - alpha**2)
# slightly grow beta to avoid failing vector norm test norm(y) / norm(x) below
beta = (1 + eps) * beta
print("beta =", beta)
x = np.array([1, beta], dtype=np.float32)
q = np.array([1 - eps, eps], dtype=np.float32)
assert np.abs(1 - la.norm(q)) <= eps
y = x - np.dot(q, np.dot(q.T, x))
# check that the early termination condition is fulfilled
assert la.norm(y) >= alpha * la.norm(x)
print("norm(y) / norm(x) =", la.norm(y) / la.norm(x))
print(" q^* y =", np.dot(q.T, y) / la.norm(y)) |
Thanks @ConradS. Sorry our posts criss-crossed. I was answering on the other thread at: A similar analysis for two vectors to what you are doing is done by Beresford Parlett in his book "The Symmetric Eigenvalue Problem" published in 1980. Credits for the algorithm is Kahan and Parlett and the algorithm for two vectors gave the moniker of "Twice is enough". |
(I am taking the liberty to continue the discussion here.) Thank you for this hint. I can now see in the Giraud/Langou/Rozložnı́k paper "On the round-off error analysis of the Gram-Schmidt algorithm with
As stated earlier, xORBDB6 assumes unit-norm input vectors For this reason I think Having skimmed the GLR paper, I wonder if
where
For @langou, please let me know if and how you want me to make the orthogonalization safer. |
The input in the C code below triggers SORCSD2BY1 to return a matrix with infinite values (when the matrix should be orthogonal). The issue is present at least since v3.9.1 (commit 77a0ceb). The cause is xORBDB5 returning a vector very small in norm (much smaller than ε). The causes problems when computing Householder reflectors in SLARFGP in the xORBDB5 caller code. This subroutine does possess a branch for handling underflows but this branch is not taken because Line 200 in 8ecaaf9
The demonstration code (compile with
Starting with version 3.11, the output is
Version 3.11 accidentally fixes the issue. In version 3.11 (specifically in #647) an assumption was added to xORBDB6 that the input vector has unit norm. This is most likely not the case when xORBDB5 calls xORBDB6 for the first time in line 218 but this is the only execution path returning nonzero vectors with a very small norm. Demonstration code (NB it calls SORCSD2BY1 instad of SORBDB3 because it conveniently forms U1 from the elementary Householder reflectors): #include <math.h>
#include <stddef.h>
#include <stdio.h>
typedef int lapack_int;
void sorcsd2by1_(
char* jobu1,
char* jobu2,
char* jobv1t,
lapack_int* m,
lapack_int* p,
lapack_int* q,
float* x11,
lapack_int* ldx11,
float* x21,
lapack_int* ldx21,
float* theta,
float* u1,
lapack_int* ldu1,
float* u2,
lapack_int* ldu2,
float* v1t,
lapack_int* ldv1t,
float* work,
lapack_int* lwork,
lapack_int* iwork,
lapack_int* info,
size_t strlen_jobu1,
size_t strlen_jobu2,
size_t strlen_jobv1t);
#define P 2
#define M (P + 1)
#define Q 2
int main()
{
lapack_int m = M;
lapack_int p = P;
lapack_int q = Q;
float x[M * Q] = { 0.00000000, 1.00000000,
-1.12831231E-07, -6.37805897E-09, -1.12831231E-07,
-1.00000000 };
lapack_int ldx11 = m;
lapack_int ldx21 = m;
float nan = NAN;
float theta[Q] = { nan };
float u1[P * P] = { nan };
lapack_int ldu1 = P;
float u2[(M - P) * (M - P)] = { nan };
lapack_int ldu2 = M - P;
float v1t[Q * Q] = { nan };
lapack_int ldv1t = Q;
float work[256] = { nan };
lapack_int lwork = sizeof(work) / sizeof(work[0]);
lapack_int iwork[M + P + Q] = { -1 };
lapack_int info = -1;
//printf("X11 %+9.2e %+9.2e\n", x[0], x[ldx11 + 0]);
//printf("X11 %+9.2e %+9.2e\n", x[1], x[ldx11 + 1]);
//printf("X11 %+9.2e %+9.2e\n", x[2], x[ldx11 + 2]);
char yes = 'Y';
sorcsd2by1_(
&yes,
&yes,
&yes,
&m,
&p,
&q,
x,
&ldx11,
x + p,
&ldx21,
theta,
u1,
&ldu1,
u2,
&ldu2,
v1t,
&ldv1t,
work,
&lwork,
iwork,
&info,
1,
1,
1);
printf(
"U1 %+9.2e %+9.2e\n", u1[0], u1[1 * ldu1 + 0]);
printf(
"U1 %+9.2e %+9.2e\n", u1[1], u1[1 * ldu1 + 1]);
} |
Description
Given a 2-by-1 block matrix
Q
with orthonormal columns, SORCSD2BY1 computes matricesU1
,U2
,Σ1
,Σ2
, andV
such thatIn commit 4174d8d, SORCSD2BY1 computes an inaccurate result with a backward error of
||Q - U Σ V^T|| ≈ 10^-5
when it should be at mostε ||Q|| ≈ 10^-7
. Input withm = 3
,n = 2
,p = 2
(Q11 ∈ ℝ(p, n), Q21 ∈ ℝ(m - p, n)):Checklist
This issue was found while working on #406.
The text was updated successfully, but these errors were encountered: