Skip to content

Commit

Permalink
Merge pull request #4087 from martin-frbg/lapack847
Browse files Browse the repository at this point in the history
Improve variants of Cholesky and QR (Reference-LAPACK PR 847)
  • Loading branch information
martin-frbg committed Jun 18, 2023
2 parents c3a2d40 + f524594 commit eb058c2
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 119 deletions.
2 changes: 1 addition & 1 deletion lapack-netlib/SRC/VARIANTS/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o

LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o

QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o


.PHONY: all
Expand Down
2 changes: 1 addition & 1 deletion lapack-netlib/SRC/VARIANTS/cholesky/RL/cpotrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
C>\details \b Purpose:
C>\verbatim
C>
C> CPOTRF computes the Cholesky factorization of a real Hermitian
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
Expand Down
2 changes: 1 addition & 1 deletion lapack-netlib/SRC/VARIANTS/cholesky/RL/zpotrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
C>\details \b Purpose:
C>\verbatim
C>
C> ZPOTRF computes the Cholesky factorization of a real Hermitian
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
Expand Down
4 changes: 2 additions & 2 deletions lapack-netlib/SRC/VARIANTS/cholesky/TOP/cpotrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
C>\details \b Purpose:
C>\verbatim
C>
C> CPOTRF computes the Cholesky factorization of a real symmetric
C> CPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
Expand Down Expand Up @@ -55,7 +55,7 @@
C> \param[in,out] A
C> \verbatim
C> A is COMPLEX array, dimension (LDA,N)
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
C> N-by-N upper triangular part of A contains the upper
C> triangular part of the matrix A, and the strictly lower
C> triangular part of A is not referenced. If UPLO = 'L', the
Expand Down
4 changes: 2 additions & 2 deletions lapack-netlib/SRC/VARIANTS/cholesky/TOP/zpotrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
C>\details \b Purpose:
C>\verbatim
C>
C> ZPOTRF computes the Cholesky factorization of a real symmetric
C> ZPOTRF computes the Cholesky factorization of a complex Hermitian
C> positive definite matrix A.
C>
C> The factorization has the form
Expand Down Expand Up @@ -55,7 +55,7 @@
C> \param[in,out] A
C> \verbatim
C> A is COMPLEX*16 array, dimension (LDA,N)
C> On entry, the symmetric matrix A. If UPLO = 'U', the leading
C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
C> N-by-N upper triangular part of A contains the upper
C> triangular part of the matrix A, and the strictly lower
C> triangular part of A is not referenced. If UPLO = 'L', the
Expand Down
13 changes: 6 additions & 7 deletions lapack-netlib/SRC/VARIANTS/qr/LL/cgeqrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
C>\details \b Purpose:
C>\verbatim
C>
C> CGEQRF computes a QR factorization of a real M-by-N matrix A:
C> CGEQRF computes a QR factorization of a complex M-by-N matrix A:
C> A = Q * R.
C>
C> This is the left-looking Level 3 BLAS version of the algorithm.
Expand Down Expand Up @@ -172,12 +172,11 @@ SUBROUTINE CGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

Expand Down Expand Up @@ -205,13 +204,13 @@ SUBROUTINE CGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

Expand All @@ -230,7 +229,7 @@ SUBROUTINE CGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT

Expand Down
11 changes: 5 additions & 6 deletions lapack-netlib/SRC/VARIANTS/qr/LL/dgeqrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,11 @@ SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

Expand Down Expand Up @@ -205,13 +204,13 @@ SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

Expand All @@ -230,7 +229,7 @@ SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT

Expand Down
86 changes: 0 additions & 86 deletions lapack-netlib/SRC/VARIANTS/qr/LL/sceil.f

This file was deleted.

11 changes: 5 additions & 6 deletions lapack-netlib/SRC/VARIANTS/qr/LL/sgeqrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,11 @@ SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

Expand Down Expand Up @@ -205,13 +204,13 @@ SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

Expand All @@ -230,7 +229,7 @@ SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT

Expand Down
13 changes: 6 additions & 7 deletions lapack-netlib/SRC/VARIANTS/qr/LL/zgeqrf.f
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
C>\details \b Purpose:
C>\verbatim
C>
C> ZGEQRF computes a QR factorization of a real M-by-N matrix A:
C> ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
C> A = Q * R.
C>
C> This is the left-looking Level 3 BLAS version of the algorithm.
Expand Down Expand Up @@ -172,12 +172,11 @@ SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
INTRINSIC CEILING, MAX, MIN, REAL
* ..
* .. External Functions ..
INTEGER ILAENV
REAL SCEIL
EXTERNAL ILAENV, SCEIL
EXTERNAL ILAENV
* ..
* .. Executable Statements ..

Expand Down Expand Up @@ -205,13 +204,13 @@ SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
*
* So here 4 x 4 is the last T stored in the workspace
*
NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB
NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB

*
* optimal workspace = space for dlarfb + space for normal T's + space for the last T
*
LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
LLWORK = SCEIL(REAL(LLWORK)/REAL(NB))
LLWORK = CEILING(REAL(LLWORK)/REAL(NB))

IF( K.EQ.0 ) THEN

Expand All @@ -230,7 +229,7 @@ SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )

ELSE

LBWORK = SCEIL(REAL(K)/REAL(NB))*NB
LBWORK = CEILING(REAL(K)/REAL(NB))*NB
LWKOPT = (LBWORK+LLWORK-NB)*NB
WORK( 1 ) = LWKOPT

Expand Down

0 comments on commit eb058c2

Please sign in to comment.