diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5420545cc3cec..cdb224d0cd09f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6288,13 +6288,13 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( // LDRWui %0:sub_32, %stack.0 // if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) { - const TargetRegisterClass *FillRC; + const TargetRegisterClass *FillRC = nullptr; switch (DstMO.getSubReg()) { default: - FillRC = nullptr; break; case AArch64::sub_32: - FillRC = &AArch64::GPR32RegClass; + if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg))) + FillRC = &AArch64::GPR32RegClass; break; case AArch64::ssub: FillRC = &AArch64::FPR32RegClass; diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir index 0149e4504bed2..9ea9ce53b68a8 100644 --- a/llvm/test/CodeGen/AArch64/spill-fold.mir +++ b/llvm/test/CodeGen/AArch64/spill-fold.mir @@ -10,6 +10,7 @@ define i64 @test_subreg_fill_fold() { ret i64 0 } define double @test_subreg_fill_fold2() { ret double 0.0 } define <4 x float> @test_subreg_fill_fold3() { ret <4 x float> undef } + define i64 @test_subreg_fill_fold4() { ret i64 0 } define i64 @test_nzcv_spill_fold() { ret i64 0 } ... --- @@ -121,6 +122,24 @@ body: | RET_ReallyLR implicit $s0 ... --- +# CHECK-LABEL: name: test_subreg_fill_fold4 +# Ensure the COPY is maintained when its result register class is not compatible +# with the fill load's. +name: test_subreg_fill_fold4 +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr64sp } +body: | + bb.0: + %0 = COPY $wzr + INLINEASM &nop, 1, 12, implicit-def dead $x0, 12, implicit-def dead $x1, 12, implicit-def dead $x2, 12, implicit-def dead $x3, 12, implicit-def dead $x4, 12, implicit-def dead $x5, 12, implicit-def dead $x6, 12, implicit-def dead $x7, 12, implicit-def dead $x8, 12, implicit-def dead $x9, 12, implicit-def dead $x10, 12, implicit-def dead $x11, 12, implicit-def dead $x12, 12, implicit-def dead $x13, 12, implicit-def dead $x14, 12, implicit-def dead $x15, 12, implicit-def dead $x16, 12, implicit-def dead $x17, 12, implicit-def dead $x18, 12, implicit-def dead $x19, 12, implicit-def dead $x20, 12, implicit-def dead $x21, 12, implicit-def dead $x22, 12, implicit-def dead $x23, 12, implicit-def dead $x24, 12, implicit-def dead $x25, 12, implicit-def dead $x26, 12, implicit-def dead $x27, 12, implicit-def dead $x28, 12, implicit-def dead $fp, 12, implicit-def dead $lr, 12, implicit-def $sp + ; CHECK: %2:gpr32 = LDRWui %stack.0, 0 :: (load (s32) from %stack.0) + ; CHECK: undef %1.sub_32:gpr64sp = COPY %2 + undef %1.sub_32:gpr64sp = COPY %0 + $x0 = COPY %1 + RET_ReallyLR implicit $x0 +... +--- # CHECK-LABEL: name: test_nzcv_spill_fold # Ensure that nzcv COPY cannot be folded. name: test_nzcv_spill_fold