Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 5 additions & 37 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7333,44 +7333,12 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
return CreatedBB;
}

// Legalize REG_SEQUENCE and PHI
// The register class of the operands much be the same type as the register
// Legalize PHI
// The register class of the operands must be the same type as the register
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't actually true though, though it may be convenient. It's not "legalization" and probably belongs somewhere else?se l

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK but my patch does not change this, and there's only so much code I'm prepared to rewrite to fix a relatively minor issue to do with the choice of a/v/av classes.

As I said in the other PR, I think this is required to implement temporal divergence.

// class of the output.
if (MI.getOpcode() == AMDGPU::PHI) {
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual())
continue;
const TargetRegisterClass *OpRC =
MRI.getRegClass(MI.getOperand(i).getReg());
if (RI.hasVectorRegisters(OpRC)) {
VRC = OpRC;
} else {
SRC = OpRC;
}
}

// If any of the operands are VGPR registers, then they all most be
// otherwise we will create illegal VGPR->SGPR copies when legalizing
// them.
if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) {
if (!VRC) {
assert(SRC);
if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
VRC = &AMDGPU::VReg_1RegClass;
} else
VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(SRC)
: RI.getEquivalentVGPRClass(SRC);
} else {
VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(VRC)
: RI.getEquivalentVGPRClass(VRC);
}
RC = VRC;
} else {
RC = SRC;
}
const TargetRegisterClass *VRC = getOpRegClass(MI, 0);
assert(!RI.isSGPRClass(VRC));

// Update all the operands so they have the same type.
for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
Expand All @@ -7384,7 +7352,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,

// Avoid creating no-op copies with the same src and dst reg class. These
// confuse some of the machine passes.
legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
legalizeGenericOperand(*InsertBB, Insert, VRC, Op, MRI, MI.getDebugLoc());
}
}

Expand Down
602 changes: 307 additions & 295 deletions llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll

Large diffs are not rendered by default.

77 changes: 37 additions & 40 deletions llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,16 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
; CHECK-NEXT: .LBB0_1: ; %Flow9
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[24:25]
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
; CHECK-NEXT: s_cbranch_vccz .LBB0_18
; CHECK-NEXT: s_cbranch_vccz .LBB0_17
; CHECK-NEXT: .LBB0_2: ; %._crit_edge1942.i.i.i3548
; CHECK-NEXT: ; =>This Loop Header: Depth=1
; CHECK-NEXT: ; Child Loop BB0_7 Depth 2
; CHECK-NEXT: ; Child Loop BB0_6 Depth 2
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
; CHECK-NEXT: s_cbranch_vccnz .LBB0_9
; CHECK-NEXT: ; %bb.3: ; %.preheader1868.i.i.i3244
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
; CHECK-NEXT: s_cbranch_vccz .LBB0_12
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
; CHECK-NEXT: ; %bb.4: ; %.preheader1855.i.i.i3329.preheader
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[14:15]
Expand Down Expand Up @@ -86,54 +85,49 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29]
; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[18:19]
; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27]
; CHECK-NEXT: s_branch .LBB0_7
; CHECK-NEXT: .LBB0_5: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: .LBB0_6: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_5: ; %Flow
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
; CHECK-NEXT: s_cbranch_vccnz .LBB0_13
; CHECK-NEXT: .LBB0_7: ; %.preheader1855.i.i.i3329
; CHECK-NEXT: s_cbranch_vccnz .LBB0_11
; CHECK-NEXT: .LBB0_6: ; %.preheader1855.i.i.i3329
; CHECK-NEXT: ; Parent Loop BB0_2 Depth=1
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
; CHECK-NEXT: v_accvgpr_read_b32 v27, a1
; CHECK-NEXT: v_accvgpr_read_b32 v26, a0
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.8: ; %.lr.ph2070.i.i.i3291
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: ; %bb.7: ; %.lr.ph2070.i.i.i3291
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
; CHECK-NEXT: s_cbranch_vccz .LBB0_10
; CHECK-NEXT: ; %bb.9: ; %.preheader1856.preheader.i.i.i3325
; CHECK-NEXT: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.8: ; %.preheader1856.preheader.i.i.i3325
; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
; CHECK-NEXT: v_accvgpr_write_b32 a0, v28
; CHECK-NEXT: s_mov_b64 s[24:25], 0
; CHECK-NEXT: v_accvgpr_write_b32 a1, v29
; CHECK-NEXT: s_mov_b64 s[8:9], 0
; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_7 Depth=2
; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[10:11]
; CHECK-NEXT: s_branch .LBB0_5
; CHECK-NEXT: .LBB0_9: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[22:23], 0
; CHECK-NEXT: v_mov_b64_e32 v[30:31], s[10:11]
; CHECK-NEXT: s_mov_b64 s[8:9], s[20:21]
; CHECK-NEXT: s_branch .LBB0_16
; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_branch .LBB0_15
; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
; CHECK-NEXT: v_mov_b64_e32 v[24:25], v[30:31]
; CHECK-NEXT: s_branch .LBB0_16
; CHECK-NEXT: .LBB0_13: ; %loop.exit.guard
; CHECK-NEXT: s_branch .LBB0_15
; CHECK-NEXT: .LBB0_11: ; %loop.exit.guard
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_and_b64 vcc, exec, s[24:25]
; CHECK-NEXT: s_cbranch_vccz .LBB0_15
; CHECK-NEXT: ; %bb.14: ; %._crit_edge2105.i.i.i2330.loopexit
; CHECK-NEXT: s_cbranch_vccz .LBB0_13
; CHECK-NEXT: ; %bb.12: ; %._crit_edge2105.i.i.i2330.loopexit
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_cmp_nlg_f64_e64 s[8:9], 0, v[26:27]
; CHECK-NEXT: v_cndmask_b32_e64 v23, v23, 0, s[16:17]
Expand All @@ -145,21 +139,24 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
; CHECK-NEXT: s_cselect_b32 s23, s23, 0
; CHECK-NEXT: s_cselect_b32 s22, s22, 0
; CHECK-NEXT: s_mov_b64 s[8:9], -1
; CHECK-NEXT: s_branch .LBB0_16
; CHECK-NEXT: .LBB0_15: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_branch .LBB0_14
; CHECK-NEXT: .LBB0_13: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[8:9], 0
; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
; CHECK-NEXT: .LBB0_16: ; %Flow6
; CHECK-NEXT: .LBB0_14: ; %Flow6
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
; CHECK-NEXT: .LBB0_15: ; %Flow6
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[24:25], -1
; CHECK-NEXT: s_and_b64 vcc, exec, s[8:9]
; CHECK-NEXT: s_cbranch_vccz .LBB0_1
; CHECK-NEXT: ; %bb.17: ; %._crit_edge2105.i.i.i2330
; CHECK-NEXT: ; %bb.16: ; %._crit_edge2105.i.i.i2330
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: s_mov_b64 s[24:25], 0
; CHECK-NEXT: global_store_dwordx2 v20, v[20:21], s[12:13]
; CHECK-NEXT: s_branch .LBB0_1
; CHECK-NEXT: .LBB0_18: ; %DummyReturnBlock
; CHECK-NEXT: .LBB0_17: ; %DummyReturnBlock
; CHECK-NEXT: s_endpgm
entry:
br label %._crit_edge1942.i.i.i3548
Expand Down
Loading