From 34d76b974e92d85ae853bc9e1fe105b3af312ba2 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 15 Jul 2025 15:39:33 +0100 Subject: [PATCH 1/2] [LLVM][AArch64InstrInfo] Prevent fill folding when DstReg is SP. We can remove subreg COPY instructions by filling directly into the COPY's destination register. However, this is only valid when the copy and fill have compatible register classes. Fixes https://github.com/llvm/llvm-project/issues/148659 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 3 ++- llvm/test/CodeGen/AArch64/spill-fold.mir | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5420545cc3cec..4d243dcaf4627 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6294,7 +6294,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( FillRC = nullptr; break; case AArch64::sub_32: - FillRC = &AArch64::GPR32RegClass; + if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg))) + FillRC = &AArch64::GPR32RegClass; break; case AArch64::ssub: FillRC = &AArch64::FPR32RegClass; diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir index 0149e4504bed2..9ea9ce53b68a8 100644 --- a/llvm/test/CodeGen/AArch64/spill-fold.mir +++ b/llvm/test/CodeGen/AArch64/spill-fold.mir @@ -10,6 +10,7 @@ define i64 @test_subreg_fill_fold() { ret i64 0 } define double @test_subreg_fill_fold2() { ret double 0.0 } define <4 x float> @test_subreg_fill_fold3() { ret <4 x float> undef } + define i64 @test_subreg_fill_fold4() { ret i64 0 } define i64 @test_nzcv_spill_fold() { ret i64 0 } ... --- @@ -121,6 +122,24 @@ body: | RET_ReallyLR implicit $s0 ... --- +# CHECK-LABEL: name: test_subreg_fill_fold4 +# Ensure the COPY is maintained when its result register class is not compatible +# with the fill load's. +name: test_subreg_fill_fold4 +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr64sp } +body: | + bb.0: + %0 = COPY $wzr + INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp + ; CHECK: %2:gpr32 = LDRWui %stack.0, 0 :: (load (s32) from %stack.0) + ; CHECK: undef %1.sub_32:gpr64sp = COPY %2 + undef %1.sub_32:gpr64sp = COPY %0 + $x0 = COPY %1 + RET_ReallyLR implicit $x0 +... +--- # CHECK-LABEL: name: test_nzcv_spill_fold # Ensure that nzcv COPY cannot be folded. name: test_nzcv_spill_fold From d8b6729bb1c44351bae2fa9d1aace09f19bf56af Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 15 Jul 2025 17:00:28 +0000 Subject: [PATCH 2/2] Ensure FillRC is always initialised. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 4d243dcaf4627..cdb224d0cd09f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6288,10 +6288,9 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // LDRWui %0:sub_32, %stack.0 // if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { - const TargetRegisterClass *FillRC; + const TargetRegisterClass *FillRC = nullptr; switch (DstMO.getSubReg()) { default: - FillRC = nullptr; break; case AArch64::sub_32: if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))