From e62090d0ebfaf6bc67236f7232a27fc6cc98f545 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Thu, 1 Aug 2024 12:47:42 +0000 Subject: [PATCH 1/3] [AArch64][SME] Rewrite __arm_sc_memset to remove invalid instruction The implementation of __arm_sc_memset in compiler-rt contains a Neon dup instruction which is not valid in streaming mode. This patch rewrites the function to use spills & fills, or to use an SVE mov instruction if available. --- .../builtins/aarch64/sme-libc-mem-routines.S | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S index 926ad3b1b63315..a3fa59ff190033 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S +++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S @@ -252,7 +252,29 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy) #define zva_val x5 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset) - dup v0.16B, valw +# ifdef __ARM_FEATURE_SVE + mov z0.b, valw +# else + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + strb valw, [sp, #15] + strb valw, [sp, #14] + strb valw, [sp, #13] + strb valw, [sp, #12] + strb valw, [sp, #11] + strb valw, [sp, #10] + strb valw, [sp, #9] + strb valw, [sp, #8] + strb valw, [sp, #7] + strb valw, [sp, #6] + strb valw, [sp, #5] + strb valw, [sp, #4] + strb valw, [sp, #3] + strb valw, [sp, #2] + strb valw, [sp, #1] + strb valw, [sp] + ldr q0, [sp], #16 +# endif add dstend2, dstin, count cmp count, 96 From 21566dcc792233cff8cd57a8abbaaa7d8ebc243d Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 2 Aug 2024 10:04:16 +0000 Subject: [PATCH 2/3] - Rewrite copy of valw when SVE is not available --- .../builtins/aarch64/sme-libc-mem-routines.S | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S index a3fa59ff190033..b623be043d7a33 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S +++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S @@ -255,25 +255,11 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset) # ifdef __ARM_FEATURE_SVE mov z0.b, valw # else - sub sp, sp, #16 - .cfi_def_cfa_offset 16 - strb valw, [sp, #15] - strb valw, [sp, #14] - strb valw, [sp, #13] - strb valw, [sp, #12] - strb valw, [sp, #11] - strb valw, [sp, #10] - strb valw, [sp, #9] - strb valw, [sp, #8] - strb valw, [sp, #7] - strb valw, [sp, #6] - strb valw, [sp, #5] - strb valw, [sp, #4] - strb valw, [sp, #3] - strb valw, [sp, #2] - strb valw, [sp, #1] - strb valw, [sp] - ldr q0, [sp], #16 + bfi valw, valw, #8, #8 + bfi valw, valw, #16, #16 + bfi val, val, #32, #32 + fmov d0, val + fmov v0.d[1], val # endif add dstend2, dstin, count From e44c6026f7facf6f95120e8157de1f85be114e1b Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 2 Aug 2024 14:55:07 +0000 Subject: [PATCH 3/3] - Removed unnecessary whitespace --- compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S index b623be043d7a33..0318d9a6f1ebd2 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S +++ b/compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S @@ -252,15 +252,15 @@ DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy) #define zva_val x5 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset) -# ifdef __ARM_FEATURE_SVE +#ifdef __ARM_FEATURE_SVE mov z0.b, valw -# else +#else bfi valw, valw, #8, #8 bfi valw, valw, #16, #16 bfi val, val, #32, #32 fmov d0, val fmov v0.d[1], val -# endif +#endif add dstend2, dstin, count cmp count, 96