diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 13a2db7a87b437..dcd4f0f65e8ef2 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1299,7 +1299,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, MIB.setMIFlag(MachineInstr::FrameDestroy); } else { // Insert the CSR spill restores with SP as the base register. - emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg, + emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, + FuncInfo->isChainFunction() ? Register() : StackPtrReg, FramePtrRegScratchCopy); } } diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir index fa62048fd31adf..bb248fe0444dbd 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir @@ -67,16 +67,24 @@ body: | liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args - ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 + ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 + ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 + ; GCN-NEXT: $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 - ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 - ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 10, implicit $exec - ; GCN-NEXT: $vgpr8 = COPY killed $vgpr0 + ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0 + ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: $vgpr8 = COPY killed $vgpr10 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) + ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10 $sgpr35 = S_MOV_B32 5 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0 @@ -104,10 +112,12 @@ body: | ; GCN-LABEL: name: dont_preserve_args ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) @@ -131,15 +141,23 @@ body: | liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args - ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr10 + ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr0 + ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 + ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 $sgpr35 = S_MOV_B32 5 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 @@ -168,6 +186,7 @@ body: | ; GCN-LABEL: name: dont_preserve_if_no_chain_calls ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 @@ -175,6 +194,7 @@ body: | ; GCN-NEXT: $vgpr9 = V_MOV_B32_e32 20, implicit $exec ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 30, implicit $exec ; GCN-NEXT: S_ENDPGM 0 + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 $sgpr35 = S_MOV_B32 5 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir index 49001a2cfd7a65..4aea915936ffce 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir @@ -8,7 +8,6 @@ declare amdgpu_gfx void @gfx_callee() define amdgpu_cs_chain void @preserve_inactive_wwm() {ret void} - define amdgpu_cs_chain void @preserve_inactive_detected_wwm() {ret void} define amdgpu_cs_chain void @dont_preserve_wwm_if_no_chain_calls() {ret void} define amdgpu_cs_chain void @dont_preserve_wwm_if_init_whole_wave() {ret void} define amdgpu_cs_chain void @dont_preserve_non_wwm() {ret void} @@ -36,55 +35,23 @@ body: | liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-LABEL: name: preserve_inactive_wwm - ; GCN: liveins: $sgpr0, $sgpr35 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc - ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1 - renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc - renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 - -... - -# Check that it also works for SGPR to VGPR spills. - ---- -name: preserve_inactive_detected_wwm -tracksRegLiveness: true -frameInfo: - hasTailCall: true -machineFunctionInfo: - stackPtrOffsetReg: '$sgpr32' - returnsVoid: true -body: | - bb.0: - liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 - - ; GCN-LABEL: name: preserve_inactive_detected_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 - ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 - ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 - ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec - ; GCN-NEXT: $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9 - ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 - ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0 - ; GCN-NEXT: renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 + ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 - renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 - $sgpr35 = S_MOV_B32 5 - $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 - renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec - renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9 - $sgpr35 = S_MOV_B32 5 - $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0 - renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 ... @@ -110,11 +77,13 @@ body: | ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 ; GCN-NEXT: S_ENDPGM 0 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 $sgpr35 = S_MOV_B32 5 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec + S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7 S_ENDPGM 0 ...