Skip to content

Commit

Permalink
[AMDGPU] Replace isInlinableLiteral16 with specific version (#81345)
Browse files Browse the repository at this point in the history
  • Loading branch information
shiltian authored Mar 4, 2024
1 parent 6e36ceb commit 530f0e6
Show file tree
Hide file tree
Showing 11 changed files with 230 additions and 119 deletions.
54 changes: 43 additions & 11 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2006,8 +2006,12 @@ static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
return isInlinableIntLiteral(Val);
}

// f16/v2f16 operands work correctly for all values.
return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
if (VT.getScalarType() == MVT::f16)
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);

assert(VT.getScalarType() == MVT::bf16);

return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
}

bool AMDGPUOperand::isInlinableImm(MVT type) const {
Expand Down Expand Up @@ -2375,15 +2379,26 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;

case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
if (isSafeTruncation(Val, 16) &&
AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val))) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
return;
}

Inst.addOperand(MCOperand::createImm(Val & 0xffff));
setImmKindLiteral();
return;

case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
if (isSafeTruncation(Val, 16) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
setImmKindConst();
return;
Expand All @@ -2410,12 +2425,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
return;

case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: {
assert(isSafeTruncation(Val, 16));
assert(AMDGPU::isInlinableIntLiteral(static_cast<int16_t>(Val)));
Inst.addOperand(MCOperand::createImm(Val));
return;
}
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
assert(isSafeTruncation(Val, 16));
assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm()));
assert(AMDGPU::isInlinableLiteralFP16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm()));

Inst.addOperand(MCOperand::createImm(Val));
return;
Expand Down Expand Up @@ -3559,7 +3579,19 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16)
return AMDGPU::isInlinableLiteralV2BF16(Val);

return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
if (OperandType == AMDGPU::OPERAND_REG_IMM_FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_C_FP16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_FP16 ||
OperandType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED)
return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());

if (OperandType == AMDGPU::OPERAND_REG_IMM_BF16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_C_BF16 ||
OperandType == AMDGPU::OPERAND_REG_INLINE_AC_BF16 ||
OperandType == AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED)
return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());

llvm_unreachable("invalid operand type");
}
default:
llvm_unreachable("invalid operand size");
Expand Down
18 changes: 8 additions & 10 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,8 @@ void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
}
}

// This must accept a 32-bit immediate value to correctly handle packed 16-bit
// operations.
static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O) {
static bool printImmediateFP16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O) {
if (Imm == 0x3C00)
O << "1.0";
else if (Imm == 0xBC00)
Expand Down Expand Up @@ -529,17 +527,17 @@ void AMDGPUInstPrinter::printImmediateBF16(uint32_t Imm,
O << formatHex(static_cast<uint64_t>(Imm));
}

void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
void AMDGPUInstPrinter::printImmediateF16(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
int16_t SImm = static_cast<int16_t>(Imm);
if (isInlinableIntLiteral(SImm)) {
O << SImm;
return;
}

uint16_t HImm = static_cast<uint16_t>(Imm);
if (printImmediateFloat16(HImm, STI, O))
if (printImmediateFP16(HImm, STI, O))
return;

uint64_t Imm16 = static_cast<uint16_t>(Imm);
Expand All @@ -566,7 +564,7 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
if (isUInt<16>(Imm) &&
printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
printImmediateFP16(static_cast<uint16_t>(Imm), STI, O))
return;
break;
case AMDGPU::OPERAND_REG_IMM_V2BF16:
Expand Down Expand Up @@ -845,7 +843,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
printImmediate16(Op.getImm(), STI, O);
printImmediateF16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O);
void printImmediateInt16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateBF16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateF16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediateV216(uint32_t Imm, uint8_t OpType,
const MCSubtargetInfo &STI, raw_ostream &O);
bool printImmediateFloat32(uint32_t Imm, const MCSubtargetInfo &STI,
Expand Down
28 changes: 22 additions & 6 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15425,16 +15425,32 @@ bool SITargetLowering::checkAsmConstraintVal(SDValue Op, StringRef Constraint,
llvm_unreachable("Invalid asm constraint");
}

bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
uint64_t Val,
bool SITargetLowering::checkAsmConstraintValA(SDValue Op, uint64_t Val,
unsigned MaxSize) const {
unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) ||
(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
if (Size == 16) {
MVT VT = Op.getSimpleValueType();
switch (VT.SimpleTy) {
default:
return false;
case MVT::i16:
return AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
case MVT::f16:
return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
case MVT::bf16:
return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
case MVT::v2i16:
return AMDGPU::getInlineEncodingV2I16(Val).has_value();
case MVT::v2f16:
return AMDGPU::getInlineEncodingV2F16(Val).has_value();
case MVT::v2bf16:
return AMDGPU::getInlineEncodingV2BF16(Val).has_value();
}
}
if ((Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) ||
(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi)))
return true;
}
return false;
}

Expand Down
25 changes: 22 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4121,13 +4121,32 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
ST.hasInv2PiInlineImm());
case 16:
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
AMDGPU::isInlinableLiteralI16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
}
}

bool SIInstrInfo::isInlineConstant(const APFloat &Imm) const {
APInt IntImm = Imm.bitcastToAPInt();
int64_t IntImmVal = IntImm.getSExtValue();
bool HasInv2Pi = ST.hasInv2PiInlineImm();
switch (APFloat::SemanticsToEnum(Imm.getSemantics())) {
default:
llvm_unreachable("invalid fltSemantics");
case APFloatBase::S_IEEEsingle:
case APFloatBase::S_IEEEdouble:
return isInlineConstant(IntImm);
case APFloatBase::S_BFloat:
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteralBF16(IntImmVal, HasInv2Pi);
case APFloatBase::S_IEEEhalf:
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteralFP16(IntImmVal, HasInv2Pi);
}
}

bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint8_t OperandType) const {
assert(!MO.isReg() && "isInlineConstant called on register operand!");
Expand Down Expand Up @@ -4200,7 +4219,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
AMDGPU::isInlinableLiteralFP16(Trunc, ST.hasInv2PiInlineImm());
}

return false;
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -984,9 +984,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {

bool isInlineConstant(const APInt &Imm) const;

bool isInlineConstant(const APFloat &Imm) const {
return isInlineConstant(Imm.bitcastToAPInt());
}
bool isInlineConstant(const APFloat &Imm) const;

// Returns true if this non-register operand definitely does not need to be
// encoded as a 32-bit literal. Note that this function handles all kinds of
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2647,13 +2647,19 @@ bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3E22; // 1.0 / (2.0 * pi)
}

bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;

if (isInlinableIntLiteral(Literal))
return true;
return Literal == static_cast<int16_t>(0x3e22f983);
}

bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi) {
if (!HasInv2Pi)
return false;
if (isInlinableIntLiteral(Literal))
return true;
uint16_t Val = static_cast<uint16_t>(Literal);
return Val == 0x3C00 || // 1.0
Val == 0xBC00 || // -1.0
Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,13 @@ LLVM_READNONE
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
bool isInlinableLiteralI16(int16_t Literal, bool HasInv2Pi);

LLVM_READNONE
std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AMDGPU/immv216.ll
Original file line number Diff line number Diff line change
Expand Up @@ -577,40 +577,40 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
}

; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
ret <2 x i16> %y
Expand All @@ -635,31 +635,31 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
}

; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
ret <2 x i16> %y

}

; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff]
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
ret <2 x i16> %y
}

; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0]

; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
ret <2 x i16> %y
Expand Down
Loading

0 comments on commit 530f0e6

Please sign in to comment.