Skip to content

Commit

Permalink
Revert "[AMDGPU][True16][CodeGen] support v_mov_b16 and v_swap_b16 in…
Browse files Browse the repository at this point in the history
… true16 format (llvm#102198)"

This reverts commit ae059a1.
  • Loading branch information
hanhanW committed Aug 9, 2024
1 parent 7752fec commit 4369eee
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 192 deletions.
10 changes: 1 addition & 9 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1460,15 +1460,7 @@ bool SIFoldOperands::tryFoldFoldableCopy(
return false;
}

MachineOperand *OpToFoldPtr;
if (MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
// Folding when any src_modifiers are non-zero is unsupported
if (TII->hasAnyModifiersSet(MI))
return false;
OpToFoldPtr = &MI.getOperand(2);
} else
OpToFoldPtr = &MI.getOperand(1);
MachineOperand &OpToFold = *OpToFoldPtr;
MachineOperand &OpToFold = MI.getOperand(1);
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();

// FIXME: We could also be folding things like TargetIndexes.
Expand Down
6 changes: 1 addition & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3369,8 +3369,6 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,

bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B16_t16_e32:
case AMDGPU::V_MOV_B16_t16_e64:
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
case AMDGPU::V_MOV_B64_PSEUDO:
Expand Down Expand Up @@ -5641,9 +5639,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
unsigned RCID = get(MI.getOpcode()).operands()[OpIdx].RegClass;
const TargetRegisterClass *RC = RI.getRegClass(RCID);
unsigned Size = RI.getRegSizeInBits(*RC);
unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
: Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
: AMDGPU::V_MOV_B32_e32;
unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
if (MO.isReg())
Opcode = AMDGPU::COPY;
else if (RI.isSGPRClass(RC))
Expand Down
65 changes: 20 additions & 45 deletions llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,6 @@ void SIShrinkInstructions::dropInstructionKeepingImpDefs(
// although requirements match the pass placement and it reduces code size too.
MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
MovT.getOpcode() == AMDGPU::COPY);

Register T = MovT.getOperand(0).getReg();
Expand All @@ -669,12 +668,7 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
Register X = Xop.getReg();
unsigned Xsub = Xop.getSubReg();

unsigned Size = TII->getOpSize(MovT, 0);

// We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers
// are not allocatble.
if (Size == 2 && X.isVirtual())
return nullptr;
unsigned Size = TII->getOpSize(MovT, 0) / 4;

if (!TRI->isVGPR(*MRI, X))
return nullptr;
Expand All @@ -690,9 +684,9 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
KilledT = MovY->killsRegister(T, TRI);

if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
MovY->getOpcode() != AMDGPU::COPY) ||
!MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T ||
!MovY->getOperand(1).isReg() ||
MovY->getOperand(1).getReg() != T ||
MovY->getOperand(1).getSubReg() != Tsub)
continue;

Expand Down Expand Up @@ -720,15 +714,14 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
}
if (MovX ||
(I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
I->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
I->getOpcode() != AMDGPU::COPY) ||
I->getOperand(0).getReg() != X ||
I->getOperand(0).getSubReg() != Xsub) {
MovX = nullptr;
break;
}

if (Size > 4 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
continue;

MovX = &*I;
Expand All @@ -737,40 +730,23 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
if (!MovX)
continue;

LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);
LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);

MachineBasicBlock &MBB = *MovT.getParent();
SmallVector<MachineInstr *, 4> Swaps;
if (Size == 2) {
for (unsigned I = 0; I < Size; ++I) {
TargetInstrInfo::RegSubRegPair X1, Y1;
X1 = getSubRegForIndex(X, Xsub, I);
Y1 = getSubRegForIndex(Y, Ysub, I);
MachineBasicBlock &MBB = *MovT.getParent();
auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
TII->get(AMDGPU::V_SWAP_B16))
.addDef(X)
.addDef(Y)
.addReg(Y)
.addReg(X)
.getInstr();
Swaps.push_back(MIB);
} else {
assert(Size > 0 && Size % 4 == 0);
for (unsigned I = 0; I < Size / 4; ++I) {
TargetInstrInfo::RegSubRegPair X1, Y1;
X1 = getSubRegForIndex(X, Xsub, I);
Y1 = getSubRegForIndex(Y, Ysub, I);
auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
TII->get(AMDGPU::V_SWAP_B32))
.addDef(X1.Reg, 0, X1.SubReg)
.addDef(Y1.Reg, 0, Y1.SubReg)
.addReg(Y1.Reg, 0, Y1.SubReg)
.addReg(X1.Reg, 0, X1.SubReg)
.getInstr();
Swaps.push_back(MIB);
}
}
// Drop implicit EXEC.
if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
for (MachineInstr *Swap : Swaps) {
Swap->removeOperand(Swap->getNumExplicitOperands());
Swap->copyImplicitOps(*MBB.getParent(), *MovX);
TII->get(AMDGPU::V_SWAP_B32))
.addDef(X1.Reg, 0, X1.SubReg)
.addDef(Y1.Reg, 0, Y1.SubReg)
.addReg(Y1.Reg, 0, Y1.SubReg)
.addReg(X1.Reg, 0, X1.SubReg).getInstr();
if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
// Drop implicit EXEC.
MIB->removeOperand(MIB->getNumExplicitOperands());
MIB->copyImplicitOps(*MBB.getParent(), *MovX);
}
}
MovX->eraseFromParent();
Expand Down Expand Up @@ -857,7 +833,6 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}

if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
MI.getOpcode() == AMDGPU::COPY)) {
if (auto *NextMI = matchSwap(MI)) {
Next = NextMI->getIterator();
Expand Down Expand Up @@ -1048,7 +1023,7 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionProperties::Property::NoVRegs))
continue;

if (ST->useRealTrue16Insts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
!shouldShrinkTrue16(MI))
continue;

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ let SubtargetPredicate = isGFX11Plus in {
let IsInvalidSingleUseConsumer = 1;
let IsInvalidSingleUseProducer = 1;
}
defm V_MOV_B16 : VOP1Inst_t16<"v_mov_b16", VOP_I16_I16>;
defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>;
defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>;
defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>;
defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>;
Expand Down
28 changes: 20 additions & 8 deletions llvm/test/CodeGen/AMDGPU/bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2131,14 +2131,26 @@ define void @test_store_fpimm(ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) {
; GFX10-NEXT: global_store_short v[2:3], v5, off
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_store_fpimm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v4, 0x3f80
; GFX11-NEXT: v_mov_b32_e32 v5, 0x4228
; GFX11-NEXT: global_store_b16 v[0:1], v4, off
; GFX11-NEXT: global_store_b16 v[2:3], v5, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
; GFX11TRUE16-LABEL: test_store_fpimm:
; GFX11TRUE16: ; %bb.0:
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, 0x3f80
; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.h, 0x4228
; GFX11TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.l
; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, v4.h
; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v5, off
; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v4, off
; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
;
; GFX11FAKE16-LABEL: test_store_fpimm:
; GFX11FAKE16: ; %bb.0:
; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11FAKE16-NEXT: v_mov_b32_e32 v4, 0x3f80
; GFX11FAKE16-NEXT: v_mov_b32_e32 v5, 0x4228
; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v4, off
; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v5, off
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
store bfloat 1.0, ptr addrspace(1) %ptr0
store bfloat 42.0, ptr addrspace(1) %ptr1
ret void
Expand Down
16 changes: 12 additions & 4 deletions llvm/test/CodeGen/AMDGPU/fadd.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ define amdgpu_kernel void @fadd_f16_imm_a(
; GFX11-SDAG-NEXT: s_mov_b32 s3, s7
; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x3c00
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -262,7 +264,9 @@ define amdgpu_kernel void @fadd_f16_imm_a(
; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l
; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, 0x3c00
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand Down Expand Up @@ -386,7 +390,9 @@ define amdgpu_kernel void @fadd_f16_imm_b(
; GFX11-SDAG-NEXT: s_mov_b32 s3, s7
; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, 0x4000
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-SDAG-NEXT: s_nop 0
; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand All @@ -402,7 +408,9 @@ define amdgpu_kernel void @fadd_f16_imm_b(
; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7]
; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l
; GFX11-GISEL-NEXT: v_mov_b16_e32 v0.h, 0x4000
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h
; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
Expand Down
110 changes: 0 additions & 110 deletions llvm/test/CodeGen/AMDGPU/v_swap_b16.ll

This file was deleted.

18 changes: 18 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,21 @@ s_load_b96 s[20:22], s[2:3], s0

s_buffer_load_b96 s[20:22], s[4:7], s0
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_mov_b16 v0.l, s0.h
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_mov_b16 v0.l, ttmp0.h
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_mov_b16 v0.l, a0.h
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_mov_b16 v0.l, s0.h
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_mov_b16 v0.l, ttmp0.h
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

v_mov_b16 v0.l, a0.h
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
10 changes: 0 additions & 10 deletions llvm/test/MC/AMDGPU/gfx11_asm_t16_err.s

This file was deleted.

0 comments on commit 4369eee

Please sign in to comment.