Skip to content

Commit 3ebb30b

Browse files
committed
Merge branch 'master' into inv_ex
2 parents 93b6725 + 67115cd commit 3ebb30b

File tree

5 files changed

+392
-49
lines changed

5 files changed

+392
-49
lines changed

ext/nmatrix/math.cpp

+260-19
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,14 @@ namespace nm {
195195
* Calculate the determinant for a dense matrix (A [elements]) of size 2 or 3. Return the result.
196196
*/
197197
template <typename DType>
198-
void det_exact(const int M, const void* A_elements, const int lda, void* result_arg) {
198+
void det_exact_from_dense(const int M, const void* A_elements, const int lda, void* result_arg) {
199199
DType* result = reinterpret_cast<DType*>(result_arg);
200200
const DType* A = reinterpret_cast<const DType*>(A_elements);
201201

202202
typename LongDType<DType>::type x, y;
203203

204204
if (M == 2) {
205205
*result = A[0] * A[lda+1] - A[1] * A[lda];
206-
207206
} else if (M == 3) {
208207
x = A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]; // ei - fh
209208
y = A[lda] * A[2*lda+2] - A[lda+2] * A[2*lda]; // fg - di
@@ -220,9 +219,99 @@ namespace nm {
220219

221220
//we can't do det_exact on byte, because it will want to return a byte (unsigned), but determinants can be negative, even if all elements of the matrix are positive
222221
template <>
223-
void det_exact<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
222+
void det_exact_from_dense<uint8_t>(const int M, const void* A_elements, const int lda, void* result_arg) {
224223
rb_raise(nm_eDataTypeError, "cannot call det_exact on unsigned type");
225224
}
225+
/*
226+
* Calculate the determinant for a yale matrix (storage) of size 2 or 3. Return the result.
227+
*/
228+
template <typename DType>
229+
void det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda, void* result_arg) {
230+
DType* result = reinterpret_cast<DType*>(result_arg);
231+
IType* ija = reinterpret_cast<IType *>(storage->ija);
232+
DType* a = reinterpret_cast<DType*>(storage->a);
233+
IType col_pos = storage->shape[0] + 1;
234+
if (M == 2) {
235+
if (ija[2] - ija[0] == 2) {
236+
*result = a[0] * a[1] - a[col_pos] * a[col_pos+1];
237+
}
238+
else { *result = a[0] * a[1]; }
239+
} else if (M == 3) {
240+
DType m[3][3];
241+
for (int i = 0; i < 3; ++i) {
242+
m[i][i] = a[i];
243+
switch(ija[i+1] - ija[i]) {
244+
case 2:
245+
m[i][ija[col_pos]] = a[col_pos];
246+
m[i][ija[col_pos+1]] = a[col_pos+1];
247+
col_pos += 2;
248+
break;
249+
case 1:
250+
m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
251+
m[i][ija[col_pos]] = a[col_pos];
252+
++col_pos;
253+
break;
254+
case 0:
255+
m[i][(i+1)%3] = m[i][(i+2)%3] = 0;
256+
break;
257+
default:
258+
rb_raise(rb_eArgError, "some value in IJA is incorrect!");
259+
}
260+
}
261+
*result =
262+
m[0][0] * m[1][1] * m[2][2] + m[0][1] * m[1][2] * m[2][0] + m[0][2] * m[1][0] * m[2][1]
263+
- m[0][0] * m[1][2] * m[2][1] - m[0][1] * m[1][0] * m[2][2] - m[0][2] * m[1][1] * m[2][0];
264+
265+
} else if (M < 2) {
266+
rb_raise(rb_eArgError, "can only calculate exact determinant of a square matrix of size 2 or larger");
267+
} else {
268+
rb_raise(rb_eNotImpError, "exact determinant calculation needed for matrices larger than 3x3");
269+
}
270+
}
271+
272+
/*
273+
* Solve a system of linear equations using forward-substution followed by
274+
* back substution from the LU factorization of the matrix of co-efficients.
275+
* Replaces x_elements with the result. Works only with non-integer, non-object
276+
* data types.
277+
*
278+
* args - r -> The number of rows of the matrix.
279+
* lu_elements -> Elements of the LU decomposition of the co-efficients
280+
* matrix, as a contiguos array.
281+
* b_elements -> Elements of the the right hand sides, as a contiguous array.
282+
* x_elements -> The array that will contain the results of the computation.
283+
* pivot -> Positions of permuted rows.
284+
*/
285+
template <typename DType>
286+
void solve(const int r, const void* lu_elements, const void* b_elements, void* x_elements, const int* pivot) {
287+
int ii = 0, ip;
288+
DType sum;
289+
290+
const DType* matrix = reinterpret_cast<const DType*>(lu_elements);
291+
const DType* b = reinterpret_cast<const DType*>(b_elements);
292+
DType* x = reinterpret_cast<DType*>(x_elements);
293+
294+
for (int i = 0; i < r; ++i) { x[i] = b[i]; }
295+
for (int i = 0; i < r; ++i) { // forward substitution loop
296+
ip = pivot[i];
297+
sum = x[ip];
298+
x[ip] = x[i];
299+
300+
if (ii != 0) {
301+
for (int j = ii - 1;j < i; ++j) { sum = sum - matrix[i * r + j] * x[j]; }
302+
}
303+
else if (sum != 0.0) {
304+
ii = i + 1;
305+
}
306+
x[i] = sum;
307+
}
308+
309+
for (int i = r - 1; i >= 0; --i) { // back substitution loop
310+
sum = x[i];
311+
for (int j = i + 1; j < r; j++) { sum = sum - matrix[i * r + j] * x[j]; }
312+
x[i] = sum/matrix[i * r + i];
313+
}
314+
}
226315

227316
/*
228317
* Calculates in-place inverse of A_elements. Uses Gauss-Jordan elimination technique.
@@ -375,20 +464,24 @@ namespace nm {
375464
delete[] u;
376465
}
377466

467+
void raise_not_invertible_error() {
468+
rb_raise(nm_eNotInvertibleError,
469+
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
470+
}
471+
378472
/*
379473
* Calculate the exact inverse for a dense matrix (A [elements]) of size 2 or 3. Places the result in B_elements.
380474
*/
381475
template <typename DType>
382-
void inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb) {
476+
void inverse_exact_from_dense(const int M, const void* A_elements,
477+
const int lda, void* B_elements, const int ldb) {
478+
383479
const DType* A = reinterpret_cast<const DType*>(A_elements);
384480
DType* B = reinterpret_cast<DType*>(B_elements);
385481

386482
if (M == 2) {
387483
DType det = A[0] * A[lda+1] - A[1] * A[lda];
388-
if (det == 0) {
389-
rb_raise(nm_eNotInvertibleError,
390-
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
391-
}
484+
if (det == 0) { raise_not_invertible_error(); }
392485
B[0] = A[lda+1] / det;
393486
B[1] = -A[1] / det;
394487
B[ldb] = -A[lda] / det;
@@ -397,11 +490,8 @@ namespace nm {
397490
} else if (M == 3) {
398491
// Calculate the exact determinant.
399492
DType det;
400-
det_exact<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
401-
if (det == 0) {
402-
rb_raise(nm_eNotInvertibleError,
403-
"matrix must have non-zero determinant to be invertible (not getting this error does not mean matrix is invertible if you're dealing with floating points)");
404-
}
493+
det_exact_from_dense<DType>(M, A_elements, lda, reinterpret_cast<void*>(&det));
494+
if (det == 0) { raise_not_invertible_error(); }
405495

406496
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
407497
B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
@@ -419,6 +509,113 @@ namespace nm {
419509
}
420510
}
421511

512+
template <typename DType>
513+
void inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
514+
const int lda, YALE_STORAGE* inverse, const int ldb) {
515+
516+
// inverse is a clone of storage
517+
const DType* a = reinterpret_cast<const DType*>(storage->a);
518+
const IType* ija = reinterpret_cast<const IType *>(storage->ija);
519+
DType* b = reinterpret_cast<DType*>(inverse->a);
520+
IType* ijb = reinterpret_cast<IType *>(inverse->ija);
521+
IType col_pos = storage->shape[0] + 1;
522+
// Calculate the exact determinant.
523+
DType det;
524+
525+
if (M == 2) {
526+
IType ndnz = ija[2] - ija[0];
527+
if (ndnz == 2) {
528+
det = a[0] * a[1] - a[col_pos] * a[col_pos+1];
529+
}
530+
else { det = a[0] * a[1]; }
531+
if (det == 0) { raise_not_invertible_error(); }
532+
b[0] = a[1] / det;
533+
b[1] = -a[0] / det;
534+
if (ndnz == 2) {
535+
b[col_pos] = -a[col_pos] / det;
536+
b[col_pos+1] = -a[col_pos+1] / det;
537+
}
538+
else if (ndnz == 1) {
539+
b[col_pos] = -a[col_pos] / det;
540+
}
541+
542+
} else if (M == 3) {
543+
DType *A = new DType[lda*3];
544+
for (int i = 0; i < lda; ++i) {
545+
A[i*3+i] = a[i];
546+
switch (ija[i+1] - ija[i]) {
547+
case 2:
548+
A[i*3 + ija[col_pos]] = a[col_pos];
549+
A[i*3 + ija[col_pos+1]] = a[col_pos+1];
550+
col_pos += 2;
551+
break;
552+
case 1:
553+
A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
554+
A[i*3 + ija[col_pos]] = a[col_pos];
555+
col_pos += 1;
556+
break;
557+
case 0:
558+
A[i*3 + (i+1)%3] = A[i*3 + (i+2)%3] = 0;
559+
break;
560+
default:
561+
rb_raise(rb_eArgError, "some value in IJA is incorrect!");
562+
}
563+
}
564+
det =
565+
A[0] * A[lda+1] * A[2*lda+2] + A[1] * A[lda+2] * A[2*lda] + A[2] * A[lda] * A[2*lda+1]
566+
- A[0] * A[lda+2] * A[2*lda+1] - A[1] * A[lda] * A[2*lda+2] - A[2] * A[lda+1] * A[2*lda];
567+
if (det == 0) { raise_not_invertible_error(); }
568+
569+
DType *B = new DType[3*ldb];
570+
B[0] = ( A[lda+1] * A[2*lda+2] - A[lda+2] * A[2*lda+1]) / det; // A = ei - fh
571+
B[1] = (- A[1] * A[2*lda+2] + A[2] * A[2*lda+1]) / det; // D = -bi + ch
572+
B[2] = ( A[1] * A[lda+2] - A[2] * A[lda+1]) / det; // G = bf - ce
573+
B[ldb] = (- A[lda] * A[2*lda+2] + A[lda+2] * A[2*lda]) / det; // B = -di + fg
574+
B[ldb+1] = ( A[0] * A[2*lda+2] - A[2] * A[2*lda]) / det; // E = ai - cg
575+
B[ldb+2] = (- A[0] * A[lda+2] + A[2] * A[lda]) / det; // H = -af + cd
576+
B[2*ldb] = ( A[lda] * A[2*lda+1] - A[lda+1] * A[2*lda]) / det; // C = dh - eg
577+
B[2*ldb+1]= ( -A[0] * A[2*lda+1] + A[1] * A[2*lda]) / det; // F = -ah + bg
578+
B[2*ldb+2]= ( A[0] * A[lda+1] - A[1] * A[lda]) / det; // I = ae - bd
579+
580+
// Calculate the size of ijb and b, then reallocate them.
581+
IType ndnz = 0;
582+
for (int i = 0; i < 3; ++i) {
583+
for (int j = 0; j < 3; ++j) {
584+
if (j != i && B[i*ldb + j] != 0) { ++ndnz; }
585+
}
586+
}
587+
inverse->ndnz = ndnz;
588+
col_pos = 4; // shape[0] + 1
589+
inverse->capacity = 4 + ndnz;
590+
NM_REALLOC_N(inverse->a, DType, 4 + ndnz);
591+
NM_REALLOC_N(inverse->ija, IType, 4 + ndnz);
592+
b = reinterpret_cast<DType*>(inverse->a);
593+
ijb = reinterpret_cast<IType *>(inverse->ija);
594+
595+
for (int i = 0; i < 3; ++i) {
596+
ijb[i] = col_pos;
597+
for (int j = 0; j < 3; ++j) {
598+
if (j == i) {
599+
b[i] = B[i*ldb + j];
600+
}
601+
else if (B[i*ldb + j] != 0) {
602+
b[col_pos] = B[i*ldb + j];
603+
ijb[col_pos] = j;
604+
++col_pos;
605+
}
606+
}
607+
}
608+
b[3] = 0;
609+
ijb[3] = col_pos;
610+
delete [] B;
611+
delete [] A;
612+
} else if (M == 1) {
613+
b[0] = 1 / a[0];
614+
} else {
615+
rb_raise(rb_eNotImpError, "exact inverse calculation needed for matrices larger than 3x3");
616+
}
617+
}
618+
422619
/*
423620
* Function signature conversion for calling CBLAS' gemm functions as directly as possible.
424621
*
@@ -1068,14 +1265,43 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1,
10681265

10691266

10701267
/*
1071-
* C accessor for calculating an exact determinant.
1268+
* C accessor for calculating an exact determinant. Dense matrix version.
10721269
*/
1073-
void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result) {
1074-
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact, void, const int M, const void* A_elements, const int lda, void* result_arg);
1270+
void nm_math_det_exact_from_dense(const int M, const void* elements, const int lda,
1271+
nm::dtype_t dtype, void* result) {
1272+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_dense, void, const int M,
1273+
const void* A_elements, const int lda, void* result_arg);
10751274

10761275
ttable[dtype](M, elements, lda, result);
10771276
}
10781277

1278+
/*
1279+
* C accessor for calculating an exact determinant. Yale matrix version.
1280+
*/
1281+
void nm_math_det_exact_from_yale(const int M, const YALE_STORAGE* storage, const int lda,
1282+
nm::dtype_t dtype, void* result) {
1283+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::det_exact_from_yale, void, const int M,
1284+
const YALE_STORAGE* storage, const int lda, void* result_arg);
1285+
1286+
ttable[dtype](M, storage, lda, result);
1287+
}
1288+
1289+
/*
1290+
* C accessor for solving a system of linear equations.
1291+
*/
1292+
void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv) {
1293+
int* pivot = new int[RARRAY_LEN(ipiv)];
1294+
1295+
for (int i = 0; i < RARRAY_LEN(ipiv); ++i) {
1296+
pivot[i] = FIX2INT(rb_ary_entry(ipiv, i));
1297+
}
1298+
1299+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::solve, void, const int, const void*, const void*, void*, const int*);
1300+
1301+
ttable[NM_DTYPE(x)](NM_SHAPE0(b), NM_STORAGE_DENSE(lu)->elements,
1302+
NM_STORAGE_DENSE(b)->elements, NM_STORAGE_DENSE(x)->elements, pivot);
1303+
}
1304+
10791305
/*
10801306
* C accessor for reducing a matrix to hessenberg form.
10811307
*/
@@ -1100,14 +1326,29 @@ void nm_math_inverse(const int M, void* a_elements, nm::dtype_t dtype) {
11001326
}
11011327

11021328
/*
1103-
* C accessor for calculating an exact inverse.
1329+
* C accessor for calculating an exact inverse. Dense matrix version.
11041330
*/
1105-
void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype) {
1106-
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact, void, const int, const void*, const int, void*, const int);
1331+
void nm_math_inverse_exact_from_dense(const int M, const void* A_elements,
1332+
const int lda, void* B_elements, const int ldb, nm::dtype_t dtype) {
1333+
1334+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_dense, void,
1335+
const int, const void*, const int, void*, const int);
11071336

11081337
ttable[dtype](M, A_elements, lda, B_elements, ldb);
11091338
}
11101339

1340+
/*
1341+
* C accessor for calculating an exact inverse. Yale matrix version.
1342+
*/
1343+
void nm_math_inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
1344+
const int lda, YALE_STORAGE* inverse, const int ldb, nm::dtype_t dtype) {
1345+
1346+
NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::inverse_exact_from_yale, void,
1347+
const int, const YALE_STORAGE*, const int, YALE_STORAGE*, const int);
1348+
1349+
ttable[dtype](M, storage, lda, inverse, ldb);
1350+
}
1351+
11111352
/*
11121353
* Transpose an array of elements that represent a row-major dense matrix. Does not allocate anything, only does an memcpy.
11131354
*/

ext/nmatrix/math/math.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,14 @@ extern "C" {
102102
void nm_math_solve(VALUE lu, VALUE b, VALUE x, VALUE ipiv);
103103
void nm_math_inverse(const int M, void* A_elements, nm::dtype_t dtype);
104104
void nm_math_hessenberg(VALUE a);
105-
void nm_math_det_exact(const int M, const void* elements, const int lda, nm::dtype_t dtype, void* result);
106-
void nm_math_inverse_exact(const int M, const void* A_elements, const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
105+
void nm_math_det_exact_from_dense(const int M, const void* elements,
106+
const int lda, nm::dtype_t dtype, void* result);
107+
void nm_math_det_exact_from_yale(const int M, const YALE_STORAGE* storage,
108+
const int lda, nm::dtype_t dtype, void* result);
109+
void nm_math_inverse_exact_from_dense(const int M, const void* A_elements,
110+
const int lda, void* B_elements, const int ldb, nm::dtype_t dtype);
111+
void nm_math_inverse_exact_from_yale(const int M, const YALE_STORAGE* storage,
112+
const int lda, YALE_STORAGE* inverse, const int ldb, nm::dtype_t dtype);
107113
}
108114

109115

0 commit comments

Comments
 (0)