From 7c4f3638fd74194cd0afc3ecc8074417d09d9b95 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 3 Oct 2024 22:00:15 +0200 Subject: [PATCH 1/2] switch PPCG4 SGEMM kernel to 4x4 --- param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/param.h b/param.h index 66eedc7980..259592cdfe 100644 --- a/param.h +++ b/param.h @@ -2243,7 +2243,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 1024 #define GEMM_DEFAULT_ALIGN 0x0ffffUL -#define SGEMM_DEFAULT_UNROLL_M 16 +#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_N 4 From d714013ab94ef45d9089fdfdded9beb32469d9a5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 3 Oct 2024 22:04:20 +0200 Subject: [PATCH 2/2] change sgemm kernel to 4x4 as the 16x4 altivec goes out of bounds --- kernel/power/KERNEL.PPCG4 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/power/KERNEL.PPCG4 b/kernel/power/KERNEL.PPCG4 index c73601cee3..0297df5973 100644 --- a/kernel/power/KERNEL.PPCG4 +++ b/kernel/power/KERNEL.PPCG4 @@ -70,13 +70,13 @@ DSCALKERNEL = scal_ppc440.S CSCALKERNEL = zscal_ppc440.S ZSCALKERNEL = zscal_ppc440.S -SGEMMKERNEL = gemm_kernel_altivec_g4.S -SGEMMINCOPY = ../generic/gemm_ncopy_16.c -SGEMMITCOPY = ../generic/gemm_tcopy_16.c +SGEMMKERNEL = gemm_kernel_g4.S +SGEMMINCOPY = +SGEMMITCOPY = SGEMMONCOPY = gemm_ncopy_4.S SGEMMOTCOPY = gemm_tcopy_4.S -SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) -SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +SGEMMINCOPYOBJ = +SGEMMITCOPYOBJ = SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMKERNEL = gemm_kernel_g4.S