Skip to content

Commit

Permalink
[WIP][AMDGPU] Split isInlinableLiteral16 into three and call the sp…
Browse files Browse the repository at this point in the history
…ecific version if possible

The current implementation of `isInlinableLiteral16` assumes, a 16-bit inlinable
literal is either an i16 or a fp16. This is not always true because of bf16.
However, we can't tell fp16 and bf16 apart by just looking at the value. This
patch tries to split `isInlinableLiteral16` into three versions, i16, fp16, bf16
respectively, and call the corresponding version.

This patch is based on llvm#81282. The current status is, only two uses of original
`isInlinableLiteral16` are still there. We need to add an extra argument to indicate
the type of the operand the immediate corresponds to. This will also require the
change of the function signature of the two callers.
  • Loading branch information
shiltian committed Feb 28, 2024
1 parent 7c206c7 commit 5400f6c
Show file tree
Hide file tree
Showing 11 changed files with 220 additions and 117 deletions.
48 changes: 37 additions & 11 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1982,8 +1982,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
return isInlinableIntLiteral(Val);
}

// f16/v2f16 operands work correctly for all values.
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
if (VT.getScalarType() == MVT::f16)
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);

assert(VT.getScalarType() == MVT::bf16);

return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
}

bool AMDGPUOperand::isInlinableImm(MVT type) const {
Expand Down Expand Up @@ -2351,15 +2355,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;

case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
if (isSafeTruncation(Val, 16) &&
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
return;
}

Inst.addOperand(MCOperand::createImm(Val & 0xffff));
setImmKindLiteral();
return;

case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
if (isSafeTruncation(Val, 16) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
return;
Expand All @@ -2386,12 +2401,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;

case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
assert(isSafeTruncation(Val, 16));
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
Inst.addOperand(MCOperand::createImm(Val));
return;
}
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
assert(isSafeTruncation(Val, 16));
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm()));
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm()));

Inst.addOperand(MCOperand::createImm(Val));
return;
Expand Down Expand Up @@ -3535,7 +3555,13 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
return AMDGPU::isInlinableLiteralV2BF16(Val);

return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());

llvm_unreachable("invalid operand type");
}
default:
llvm_unreachable("invalid operand size");
Expand Down
16 changes: 8 additions & 8 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,

// This must accept a 32-bit immediate value to correctly handle packed 16-bit
// operations.
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O) {
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O) {
if (Imm == 0x3C00)
O << "1.0";
else if (Imm == 0xBC00)
Expand Down Expand Up @@ -529,17 +529,17 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
O << formatHex(static_cast<uint64_t>(Imm));
}

void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
if (isInlinableIntLiteral(SImm)) {
O << SImm;
return;
}

uint16_t HImm = static_cast<uint16_t>(Imm);
if (printImmediateFloat16(HImm, STI, O))
if (printImmediateFP16(HImm, STI, O))
return;

uint64_t Imm16 = static_cast<uint16_t>(Imm);
Expand All @@ -566,7 +566,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
if (isUInt<16>(Imm) &&
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
return;
break;
case AMDGPU::OPERAND_REG_IMM_V2BF16:
Expand Down Expand Up @@ -845,7 +845,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
printImmediate16(Op.getImm(), STI, O);
printImmediateF16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O);
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateV216(uint32_t Imm, uint8_t OpType,
const MCSubtargetInfo &STI, raw_ostream &O);
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,
Expand Down
24 changes: 18 additions & 6 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15374,16 +15374,28 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
llvm_unreachable("Invalid asm constraint");
}

bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
uint64_t Val,
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
unsigned MaxSize) const {
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
return true;
if (Size == 16) {
MVT VT = Op.getSimpleValueType();
if (VT == MVT::i16 && AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi))
return true;
if (VT == MVT::f16 && AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi))
return true;
if (VT == MVT::bf16 && AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi))
return true;
if (VT == MVT::v2i16 && AMDGPU::getInlineEncodingV2I16(Val).has_value())
return true;
if (VT == MVT::v2f16 && AMDGPU::getInlineEncodingV2F16(Val).has_value())
return true;
if (VT == MVT::v2bf16 && AMDGPU::getInlineEncodingV2BF16(Val).has_value())
return true;
}
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
return true;
return false;
}

Expand Down
25 changes: 22 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4121,8 +4121,27 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
ST.hasInv2PiInlineImm());
case 16:
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
}
}

bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
APInt IntImm = Imm.bitcastToAPInt();
bool HasInv2Pi = ST.hasInv2PiInlineImm();
switch (IntImm.getBitWidth()) {
case 32:
case 64:
return isInlineConstant(IntImm);
case 16:
if (Imm.isIEEE())
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteralFP16(IntImm.getSExtValue(), HasInv2Pi);
else
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteralBF16(IntImm.getSExtValue(), HasInv2Pi);
default:
llvm_unreachable("invalid bitwidth");
}
Expand Down Expand Up @@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
}

return false;
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -982,9 +982,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {

bool isInlineConstant(const APInt &Imm) const;

bool isInlineConstant(const APFloat &Imm) const {
return isInlineConstant(Imm.bitcastToAPInt());
}
bool isInlineConstant(const APFloat &Imm) const;

// Returns true if this non-register operand definitely does not need to be
// encoded as a 32-bit literal. Note that this function handles all kinds of
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2657,13 +2657,19 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3E22; // 1.0 / (2.0 * pi)
}

bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;

if (isInlinableIntLiteral(Literal))
return true;
return Literal == static_cast<int16_t>(0x3e22f983);
}

bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;
if (isInlinableIntLiteral(Literal))
return true;
uint16_t Val = static_cast<uint16_t>(Literal);
return Val == 0x3C00 || // 1.0
Val == 0xBC00 || // -1.0
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,13 @@ LLVM_READNONE
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AMDGPU/immv216.ll
Original file line number Diff line number Diff line change
Expand Up @@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
}

; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
ret <2 x i16> %y
Expand All @@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
}

; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
ret <2 x i16> %y

}

; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
ret <2 x i16> %y
Expand Down
Loading

0 comments on commit 5400f6c

Please sign in to comment.