diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 44fa3919962c4..287c7bf51ede6 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -412,6 +412,10 @@ class APFloatBase { /// format interpretation for llvm.convert.to.arbitrary.fp and /// llvm.convert.from.arbitrary.fp intrinsics. LLVM_ABI static bool isValidArbitraryFPFormat(StringRef Format); + + /// Returns the fltSemantics for a given arbitrary FP format string, + /// or nullptr if invalid. + LLVM_ABI static const fltSemantics *getArbitraryFPSemantics(StringRef Format); }; namespace detail { diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index b8c6788e0bc03..a846aad90bc2b 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1014,6 +1014,12 @@ enum NodeType { STRICT_BF16_TO_FP, STRICT_FP_TO_BF16, + /// CONVERT_FROM_ARBITRARY_FP - This operator converts from an arbitrary + /// floating-point represented as an integer to a native FP type. + /// The first operand is the integer containing the source FP bits. + /// The second operand is a constant indicating the source FP semantics. + CONVERT_FROM_ARBITRARY_FP, + /// Perform various unary floating-point operations inspired by libm. For /// FPOWI, the result is undefined if the integer operand doesn't fit into /// sizeof(int). diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index eb20e7982a102..a7aefc64bd500 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3528,6 +3528,243 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Op); break; } + case ISD::CONVERT_FROM_ARBITRARY_FP: { + // Expand conversion from arbitrary FP format stored in an integer to a + // native IEEE float type using integer bit manipulation. + // + // TODO: currently only conversions from FP4, FP6 and FP8 formats from OCP + // specification are expanded. Remaining arbitrary FP types: Float8E4M3, + // Float8E3M4, Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, + // Float8E8M0FNU. + EVT DstVT = Node->getValueType(0); + + SDValue IntVal = Node->getOperand(0); + const uint64_t SemEnum = Node->getConstantOperandVal(1); + const auto Sem = static_cast(SemEnum); + + // Supported source formats. + switch (Sem) { + case APFloatBase::S_Float8E5M2: + case APFloatBase::S_Float8E4M3FN: + case APFloatBase::S_Float6E3M2FN: + case APFloatBase::S_Float6E2M3FN: + case APFloatBase::S_Float4E2M1FN: + break; + default: + DAG.getContext()->emitError("CONVERT_FROM_ARBITRARY_FP: not implemented " + "source format (semantics enum " + + Twine(SemEnum) + ")"); + Results.push_back(DAG.getPOISON(DstVT)); + break; + } + if (!Results.empty()) + break; + + const fltSemantics &SrcSem = APFloatBase::EnumToSemantics(Sem); + + const unsigned SrcBits = APFloat::getSizeInBits(SrcSem); + const unsigned SrcPrecision = APFloat::semanticsPrecision(SrcSem); + const unsigned SrcMant = SrcPrecision - 1; + const unsigned SrcExp = SrcBits - SrcMant - 1; + const int SrcBias = 1 - APFloat::semanticsMinExponent(SrcSem); + + const fltNonfiniteBehavior NFBehavior = SrcSem.nonFiniteBehavior; + const fltNanEncoding NanEnc = SrcSem.nanEncoding; + + // Destination format parameters. + const fltSemantics &DstSem = DstVT.getFltSemantics(); + + const unsigned DstBits = APFloat::getSizeInBits(DstSem); + const unsigned DstMant = APFloat::semanticsPrecision(DstSem) - 1; + const unsigned DstExpBits = DstBits - DstMant - 1; + const int DstMinExp = APFloat::semanticsMinExponent(DstSem); + const int DstBias = 1 - DstMinExp; + const uint64_t DstExpAllOnes = (1ULL << DstExpBits) - 1; + + // Work in an integer type matching the destination float width. + // Use zero-extend to preserve the raw bit-pattern. + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), DstBits); + SDValue Src = DAG.getZExtOrTrunc(IntVal, dl, IntVT); + + EVT SetCCVT = getSetCCResultType(IntVT); + + SDValue Zero = DAG.getConstant(0, dl, IntVT); + SDValue One = DAG.getConstant(1, dl, IntVT); + + // Extract bit fields. + const uint64_t MantMask = (SrcMant > 0) ? ((1ULL << SrcMant) - 1) : 0; + const uint64_t ExpMask = (1ULL << SrcExp) - 1; + + SDValue MantField = DAG.getNode(ISD::AND, dl, IntVT, Src, + DAG.getConstant(MantMask, dl, IntVT)); + + SDValue ExpField = + DAG.getNode(ISD::AND, dl, IntVT, + DAG.getNode(ISD::SRL, dl, IntVT, Src, + DAG.getShiftAmountConstant(SrcMant, IntVT, dl)), + DAG.getConstant(ExpMask, dl, IntVT)); + + SDValue SignBit = + DAG.getNode(ISD::SRL, dl, IntVT, Src, + DAG.getShiftAmountConstant(SrcBits - 1, IntVT, dl)); + + // Precompute sign shifted to MSB of destination. + SDValue SignShifted = + DAG.getNode(ISD::SHL, dl, IntVT, SignBit, + DAG.getShiftAmountConstant(DstBits - 1, IntVT, dl)); + + // Classify the input value based on compile-time format properties. + SDValue ExpAllOnes = DAG.getConstant(ExpMask, dl, IntVT); + SDValue IsExpAllOnes = + DAG.getSetCC(dl, SetCCVT, ExpField, ExpAllOnes, ISD::SETEQ); + SDValue IsExpZero = DAG.getSetCC(dl, SetCCVT, ExpField, Zero, ISD::SETEQ); + SDValue IsMantZero = DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETEQ); + SDValue IsMantNonZero = + DAG.getSetCC(dl, SetCCVT, MantField, Zero, ISD::SETNE); + + // NaN detection. + SDValue IsNaN; + if (NFBehavior == fltNonfiniteBehavior::FiniteOnly) { + // FiniteOnly formats (E2M1FN, E3M2FN, E2M3FN) never produce NaN. + IsNaN = DAG.getBoolConstant(false, dl, SetCCVT, IntVT); + } else if (NFBehavior == fltNonfiniteBehavior::IEEE754) { + // E5M2 produces NaN when exp == all-ones AND mantissa != 0. + IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantNonZero); + } else { + // NanOnly + AllOnes (E4M3FN): NaN when all exp and mantissa bits are 1. + assert(NanEnc == fltNanEncoding::AllOnes); + SDValue MantAllOnes = DAG.getConstant(MantMask, dl, IntVT); + SDValue IsMantAllOnes = + DAG.getSetCC(dl, SetCCVT, MantField, MantAllOnes, ISD::SETEQ); + IsNaN = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantAllOnes); + } + + // Inf detection. + SDValue IsInf; + if (NFBehavior == fltNonfiniteBehavior::IEEE754) { + // E5M2: Inf when exp == all-ones AND mantissa == 0. + IsInf = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpAllOnes, IsMantZero); + } else { + // NanOnly and FiniteOnly formats have no Inf representation. + IsInf = DAG.getBoolConstant(false, dl, SetCCVT, IntVT); + } + + // Zero detection. + SDValue IsZero = DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantZero); + + // Denorm detection: exp == 0 AND mant != 0. + SDValue IsDenorm = + DAG.getNode(ISD::AND, dl, SetCCVT, IsExpZero, IsMantNonZero); + + // Normal value conversion. + // dst_exp = exp_field + (DstBias - SrcBias) + // dst_mant = mant << (DstMant - SrcMant) + const int BiasAdjust = DstBias - SrcBias; + SDValue NormDstExp = DAG.getNode( + ISD::ADD, dl, IntVT, ExpField, + DAG.getConstant(APInt(DstBits, BiasAdjust, true), dl, IntVT)); + + SDValue NormDstMant; + if (DstMant > SrcMant) { + SDValue NormDstMantShift = + DAG.getShiftAmountConstant(DstMant - SrcMant, IntVT, dl); + NormDstMant = + DAG.getNode(ISD::SHL, dl, IntVT, MantField, NormDstMantShift); + } else { + NormDstMant = MantField; + } + + // Assemble normal result. + SDValue DstMantShift = DAG.getShiftAmountConstant(DstMant, IntVT, dl); + SDValue NormExpShifted = + DAG.getNode(ISD::SHL, dl, IntVT, NormDstExp, DstMantShift); + SDValue NormResult = DAG.getNode( + ISD::OR, dl, IntVT, + DAG.getNode(ISD::OR, dl, IntVT, SignShifted, NormExpShifted), + NormDstMant); + + // Denormal value conversion. + // For a denormal source (exp_field == 0, mant != 0), normalize by finding + // the MSB position of mant using CTLZ, then compute the correct + // exponent and mantissa for the destination format. + SDValue DenormResult; + { + const unsigned IntVTBits = DstBits; + SDValue LeadingZeros = + DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, IntVT, MantField); + + // dst_exp_denorm = (IntVTBits + DstBias - SrcBias - SrcMant) - + // LeadingZeros + const int DenormExpConst = + (int)IntVTBits + DstBias - SrcBias - (int)SrcMant; + SDValue DenormDstExp = DAG.getNode( + ISD::SUB, dl, IntVT, + DAG.getConstant(APInt(DstBits, DenormExpConst, true), dl, IntVT), + LeadingZeros); + + // MSB position of the mantissa (0-indexed from LSB). + SDValue MantMSB = + DAG.getNode(ISD::SUB, dl, IntVT, + DAG.getConstant(IntVTBits - 1, dl, IntVT), LeadingZeros); + + // leading_one = 1 << MantMSB + SDValue LeadingOne = DAG.getNode(ISD::SHL, dl, IntVT, One, MantMSB); + + // frac = mant XOR leading_one (strip the implicit 1) + SDValue Frac = DAG.getNode(ISD::XOR, dl, IntVT, MantField, LeadingOne); + + // shift_amount = DstMant - MantMSB + // = DstMant - (IntVTBits - 1 - LeadingZeros) + // = LeadingZeros - (IntVTBits - 1 - DstMant) + const unsigned ShiftSub = IntVTBits - 1 - DstMant; // always >= 0 + SDValue ShiftAmount = DAG.getNode(ISD::SUB, dl, IntVT, LeadingZeros, + DAG.getConstant(ShiftSub, dl, IntVT)); + + SDValue DenormDstMant = + DAG.getNode(ISD::SHL, dl, IntVT, Frac, ShiftAmount); + + // Assemble denorm as sign | (denorm_dst_exp << DstMant) | denorm_dst_mant + SDValue DenormExpShifted = + DAG.getNode(ISD::SHL, dl, IntVT, DenormDstExp, DstMantShift); + DenormResult = DAG.getNode( + ISD::OR, dl, IntVT, + DAG.getNode(ISD::OR, dl, IntVT, SignShifted, DenormExpShifted), + DenormDstMant); + } + + // Select between normal and denorm paths. + SDValue FiniteResult = + DAG.getSelect(dl, IntVT, IsDenorm, DenormResult, NormResult); + + // Build special-value results. + // NaN -> canonical quiet NaN: sign=0, exp=all-ones, qNaN bit set. + // Encoding: (DstExpAllOnes << DstMant) | (1 << (DstMant - 1)) + const uint64_t QNaNBit = (DstMant > 0) ? (1ULL << (DstMant - 1)) : 0; + SDValue NaNResult = + DAG.getConstant((DstExpAllOnes << DstMant) | QNaNBit, dl, IntVT); + + // Inf -> destination Inf. + // sign | (DstExpAllOnes << DstMant) + SDValue InfResult = + DAG.getNode(ISD::OR, dl, IntVT, SignShifted, + DAG.getConstant(DstExpAllOnes << DstMant, dl, IntVT)); + + // Zero -> signed zero. + // Sign bit only. + SDValue ZeroResult = SignShifted; + + // Final selection goes in order: NaN takes priority, then Inf, then Zero. + SDValue Result = FiniteResult; + Result = DAG.getSelect(dl, IntVT, IsZero, ZeroResult, Result); + Result = DAG.getSelect(dl, IntVT, IsInf, InfResult, Result); + Result = DAG.getSelect(dl, IntVT, IsNaN, NaNResult, Result); + + // Bitcast integer result to destination float type. + Result = DAG.getNode(ISD::BITCAST, dl, DstVT, Result); + + Results.push_back(Result); + break; + } case ISD::FCANONICALIZE: { // This implements llvm.canonicalize.f* by multiplication with 1.0, as // suggested in diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 16453f220bb50..0acb510a9550d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2763,6 +2763,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break; + case ISD::CONVERT_FROM_ARBITRARY_FP: + R = SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(N); + break; case ISD::POISON: case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; @@ -3050,6 +3053,19 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue +DAGTypeLegalizer::SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDLoc dl(N); + + SDValue Res = DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, dl, NVT, + N->getOperand(0), N->getOperand(1)); + + // Round the value to the softened type. + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) { return DAG.getUNDEF(MVT::i16); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0d5cba405d6e3..85eb59e5449e4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2076,6 +2076,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FP16_TO_FP: case ISD::VP_UINT_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::CONVERT_FROM_ARBITRARY_FP: + Res = PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(N); + break; case ISD::STRICT_FP16_TO_FP: case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -2685,6 +2688,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { ZExtPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0)), + N->getOperand(1)), + 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), ZExtPromotedInteger(N->getOperand(1))), 0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index da592e3cad0f5..a8ffb66a9d911 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -397,6 +397,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_CONVERT_FROM_ARBITRARY_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); SDValue PromoteIntOp_VP_ZERO_EXTEND(SDNode *N); SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); @@ -787,6 +788,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfRes_FNEG(SDNode *N); SDValue SoftPromoteHalfRes_AssertNoFPClass(SDNode *N); SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); + SDValue SoftPromoteHalfRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N); SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); SDValue SoftPromoteHalfRes_VECREDUCE(SDNode *N); SDValue SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N); @@ -838,6 +840,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); + SDValue ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N); SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 81184f709bd8c..26602c42254c8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -456,6 +456,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::USUBO: case ISD::SMULO: case ISD::UMULO: + case ISD::CONVERT_FROM_ARBITRARY_FP: case ISD::FCANONICALIZE: case ISD::FFREXP: case ISD::FMODF: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index aeb9d4d7bdc1d..564bf3b7f152e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -62,6 +62,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; + case ISD::CONVERT_FROM_ARBITRARY_FP: + R = ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(N); + break; case ISD::AssertZext: case ISD::AssertSext: case ISD::FPOWI: @@ -478,6 +481,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { N->getOperand(1)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) { + SDLoc DL(N); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + // The result needs scalarizing, but it's not a given that the source does. + // See similar logic in ScalarizeVecRes_UnaryOp. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getExtractVectorElt(DL, VT, Op, 0); + } + return DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, DL, + N->getValueType(0).getVectorElementType(), Op, + N->getOperand(1)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, @@ -818,6 +838,7 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: + case ISD::CONVERT_FROM_ARBITRARY_FP: Res = ScalarizeVecOp_UnaryOpWithExtraInput(N); break; case ISD::STRICT_SINT_TO_FP: @@ -1382,6 +1403,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_UINT_TO_FP: case ISD::FCANONICALIZE: case ISD::AssertNoFPClass: + case ISD::CONVERT_FROM_ARBITRARY_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break; case ISD::ADDRSPACECAST: @@ -2783,7 +2805,8 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); if (N->getNumOperands() <= 2) { - if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass) { + if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass || + Opcode == ISD::CONVERT_FROM_ARBITRARY_FP) { Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags); Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags); } else { @@ -3596,7 +3619,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::STRICT_FP_ROUND: case ISD::VP_FP_ROUND: - case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::FP_ROUND: + case ISD::CONVERT_FROM_ARBITRARY_FP: + Res = SplitVecOp_FP_ROUND(N); + break; case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break; case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); @@ -4732,8 +4758,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo); Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi); } else { - Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + Lo = DAG.getNode(N->getOpcode(), DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), DL, OutVT, Hi, N->getOperand(1)); } return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); @@ -5142,6 +5168,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_UINT_TO_FP: case ISD::ZERO_EXTEND: case ISD::VP_ZERO_EXTEND: + case ISD::CONVERT_FROM_ARBITRARY_FP: Res = WidenVecRes_Convert(N); break; @@ -7278,6 +7305,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::TRUNCATE: + case ISD::CONVERT_FROM_ARBITRARY_FP: Res = WidenVecOp_Convert(N); break; @@ -7499,7 +7527,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); } else { - if (Opcode == ISD::FP_ROUND) + if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP) Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1)); else Res = DAG.getNode(Opcode, dl, WideVT, InOp); @@ -7523,9 +7551,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); ReplaceValueWith(SDValue(N, 1), NewChain); } else { - for (unsigned i = 0; i < NumElts; ++i) - Ops[i] = DAG.getNode(Opcode, dl, EltVT, - DAG.getExtractVectorElt(dl, InEltVT, InOp, i)); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Elt = DAG.getExtractVectorElt(dl, InEltVT, InOp, i); + if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt, N->getOperand(1)); + else + Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt); + } } return DAG.getBuildVector(VT, dl, Ops); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2d20fe5d48517..c116146740fc4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7148,6 +7148,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getValueType(VT.getScalarType()))); return; } + case Intrinsic::convert_from_arbitrary_fp: { + // Extract format metadata and convert to semantics enum. + EVT DstVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Metadata *MD = cast(I.getArgOperand(1))->getMetadata(); + StringRef FormatStr = cast(MD)->getString(); + const fltSemantics *SrcSem = + APFloatBase::getArbitraryFPSemantics(FormatStr); + if (!SrcSem) { + DAG.getContext()->emitError( + "convert_from_arbitrary_fp: not implemented format '" + FormatStr + + "'"); + setValue(&I, DAG.getPOISON(DstVT)); + return; + } + APFloatBase::Semantics SemEnum = APFloatBase::SemanticsToEnum(*SrcSem); + + SDValue IntVal = getValue(I.getArgOperand(0)); + + // Emit ISD::CONVERT_FROM_ARBITRARY_FP node. + SDValue SemConst = + DAG.getTargetConstant(static_cast(SemEnum), sdl, MVT::i32); + setValue(&I, DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, sdl, DstVT, IntVal, + SemConst)); + return; + } case Intrinsic::set_rounding: Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other, {getRoot(), getValue(I.getArgOperand(0))}); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9453036455727..571830cc57b52 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -435,6 +435,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_BF16_TO_FP: return "strict_bf16_to_fp"; case ISD::FP_TO_BF16: return "fp_to_bf16"; case ISD::STRICT_FP_TO_BF16: return "strict_fp_to_bf16"; + case ISD::CONVERT_FROM_ARBITRARY_FP: return "convert_from_arbitrary_fp"; case ISD::LROUND: return "lround"; case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 17737a59bd768..ffc83a0cdcd91 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1108,7 +1108,7 @@ void TargetLoweringBase::initActions() { ISD::FASIN, ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2, - ISD::FMULADD}, + ISD::FMULADD, ISD::CONVERT_FROM_ARBITRARY_FP}, VT, Expand); // Overflow operations default to expand diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index b359c680ab673..36d1ee5ad9059 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -6085,6 +6086,18 @@ bool APFloatBase::isValidArbitraryFPFormat(StringRef Format) { return llvm::is_contained(ValidFormats, Format); } +const fltSemantics *APFloatBase::getArbitraryFPSemantics(StringRef Format) { + // TODO: extend to remaining arbitrary FP types: Float8E4M3, Float8E3M4, + // Float8E5M2FNUZ, Float8E4M3FNUZ, Float8E4M3B11FNUZ, Float8E8M0FNU. + return StringSwitch(Format) + .Case("Float8E5M2", &semFloat8E5M2) + .Case("Float8E4M3FN", &semFloat8E4M3FN) + .Case("Float4E2M1FN", &semFloat4E2M1FN) + .Case("Float6E3M2FN", &semFloat6E3M2FN) + .Case("Float6E2M3FN", &semFloat6E2M3FN) + .Default(nullptr); +} + APFloat::Storage::~Storage() { if (usesLayout(*semantics)) { IEEE.~IEEEFloat(); diff --git a/llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll b/llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll new file mode 100644 index 0000000000000..e7bb8825fec05 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/arbitrary-fp-to-float.ll @@ -0,0 +1,646 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck %s + +; Test llvm.convert.from.arbitrary intrinsic expansion. + +declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata) +declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata) +declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata) +declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata) + +declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata) +declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata) + +; Float8E5M2 +; Layout: sign(1) exp(5) mant(2), bias=15 +; Supports: Inf, NaN, signed zero, denormals + +; Float8E5M2 normal: 0_01111_00 = 1.0 +define float @from_f8e5m2_normal() { +; CHECK-LABEL: from_f8e5m2_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 zero: 0_00000_00 = +0.0 +define float @from_f8e5m2_zero() { +; CHECK-LABEL: from_f8e5m2_zero: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 negative zero: 1_00000_00 = -0.0 +define float @from_f8e5m2_neg_zero() { +; CHECK-LABEL: from_f8e5m2_neg_zero: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_bfrev_b32_e32 v0, 1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 denorm: 0_00000_01 = 2^(-16) +define float @from_f8e5m2_denorm() { +; CHECK-LABEL: from_f8e5m2_denorm: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x37800000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 +Inf: 0_11111_00 +define float @from_f8e5m2_inf() { +; CHECK-LABEL: from_f8e5m2_inf: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x7f800000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 NaN: 0_11111_01 +define float @from_f8e5m2_nan() { +; CHECK-LABEL: from_f8e5m2_nan: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 max: 0_11110_11 = 57344 +define float @from_f8e5m2_max() { +; CHECK-LABEL: from_f8e5m2_max: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x47600000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 negative: 1_01111_00 = -1.0 +define float @from_f8e5m2_neg() { +; CHECK-LABEL: from_f8e5m2_neg: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, -1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 runtime arg test +define float @from_f8e5m2_dynamic(i8 %x) { +; CHECK-LABEL: from_f8e5m2_dynamic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; CHECK-NEXT: v_and_b32_e32 v1, 3, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0 +; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 5 +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1 +; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0 +; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2 +; CHECK-NEXT: v_ffbh_u32_e32 v4, v1 +; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4 +; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1 +; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5 +; CHECK-NEXT: v_add_u32_e32 v6, -8, v4 +; CHECK-NEXT: v_sub_u32_e32 v4, 0x8e, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; CHECK-NEXT: v_add_u32_e32 v2, 0x38000000, v2 +; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CHECK-NEXT: v_or_b32_e32 v4, v0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 31, v0 +; CHECK-NEXT: v_or_b32_e32 v0, 0x7f800000, v3 +; CHECK-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v1, 0x7fc00000 +; CHECK-NEXT: s_and_b64 vcc, s[6:7], vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2") + ret float %r +} + +; Float8E4M3FN (NanOnly, NanEncoding=AllOnes) +; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7 +; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite. + +; Float8E4M3FN normal: 0_0111_000 = 1.0 +define float @from_f8e4m3fn_normal() { +; CHECK-LABEL: from_f8e4m3fn_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN NaN: 0_1111_111 +define float @from_f8e4m3fn_nan() { +; CHECK-LABEL: from_f8e4m3fn_nan: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN not-NaN: 0_1111_110 = 448 +; Despite exp=all-ones, this is a valid finite number (max value) +define float @from_f8e4m3fn_max() { +; CHECK-LABEL: from_f8e4m3fn_max: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x43e00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN not-NaN: 0_1111_101 = 416 +; exp=all-ones but mant!=all-ones so this is finite +define float @from_f8e4m3fn_not_nan() { +; CHECK-LABEL: from_f8e4m3fn_not_nan: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x43d00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN zero: 0_0000_000 = +0.0 +define float @from_f8e4m3fn_zero() { +; CHECK-LABEL: from_f8e4m3fn_zero: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN denorm: 0_0000_001 = 2^(-9) +define float @from_f8e4m3fn_denorm() { +; CHECK-LABEL: from_f8e4m3fn_denorm: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x3b000000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN runtime arg test +define float @from_f8e4m3fn_dynamic(i8 %x) { +; CHECK-LABEL: from_f8e4m3fn_dynamic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; CHECK-NEXT: v_and_b32_e32 v1, 7, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 24, v0 +; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 4 +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1 +; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0 +; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2 +; CHECK-NEXT: v_ffbh_u32_e32 v4, v1 +; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4 +; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1 +; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5 +; CHECK-NEXT: v_add_u32_e32 v6, -8, v4 +; CHECK-NEXT: v_sub_u32_e32 v4, 0x95, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; CHECK-NEXT: v_add_u32_e32 v2, 0x3c000000, v2 +; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5 +; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_or_b32_e32 v4, v0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 7, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 15, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0x7fc00000 +; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN") + ret float %r +} + +; Float6E3M2FN (FiniteOnly) +; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4 +; No Inf, no NaN. All bit patterns are finite. + +; Float6E3M2FN normal: 0_011_00 = 1.0 +define float @from_f6e3m2fn_normal() { +; CHECK-LABEL: from_f6e3m2fn_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN max: 0_111_11 = 28.0 +define float @from_f6e3m2fn_max() { +; CHECK-LABEL: from_f6e3m2fn_max: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x41e00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN denorm: 0_000_01 = 0.0625 +define float @from_f6e3m2fn_denorm() { +; CHECK-LABEL: from_f6e3m2fn_denorm: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x3d800000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN zero: 0_000_00 = +0.0 +define float @from_f6e3m2fn_zero() { +; CHECK-LABEL: from_f6e3m2fn_zero: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN negative: 1_011_00 = -1.0 +define float @from_f6e3m2fn_neg() { +; CHECK-LABEL: from_f6e3m2fn_neg: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, -1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN runtime arg test +define float @from_f6e3m2fn_dynamic(i6 %x) { +; CHECK-LABEL: from_f6e3m2fn_dynamic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; CHECK-NEXT: v_and_b32_e32 v1, 3, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0 +; CHECK-NEXT: v_bfe_u32 v0, v0, 2, 3 +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 21, v1 +; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0 +; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2 +; CHECK-NEXT: v_ffbh_u32_e32 v4, v1 +; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4 +; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1 +; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5 +; CHECK-NEXT: v_add_u32_e32 v6, -8, v4 +; CHECK-NEXT: v_sub_u32_e32 v4, 0x9a, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; CHECK-NEXT: v_add_u32_e32 v2, 0x3e000000, v2 +; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5 +; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc +; CHECK-NEXT: v_or_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E2M3FN (FiniteOnly) +; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2 +; No Inf, no NaN. All bit patterns are finite. + +; Float6E2M3FN normal: 0_01_000 = 1.0 +define float @from_f6e2m3fn_normal() { +; CHECK-LABEL: from_f6e2m3fn_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN max: 0_11_111 = 7.5 +define float @from_f6e2m3fn_max() { +; CHECK-LABEL: from_f6e2m3fn_max: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x40f00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN denorm: 0_00_001 = 0.125 +define float @from_f6e2m3fn_denorm() { +; CHECK-LABEL: from_f6e2m3fn_denorm: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x3e000000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN zero: 0_00_000 = +0.0 +define float @from_f6e2m3fn_zero() { +; CHECK-LABEL: from_f6e2m3fn_zero: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN runtime arg test +define float @from_f6e2m3fn_dynamic(i6 %x) { +; CHECK-LABEL: from_f6e2m3fn_dynamic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; CHECK-NEXT: v_and_b32_e32 v1, 7, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 26, v0 +; CHECK-NEXT: v_bfe_u32 v0, v0, 3, 2 +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v1 +; CHECK-NEXT: v_and_b32_e32 v3, 0x80000000, v3 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v0 +; CHECK-NEXT: v_or3_b32 v2, v4, v3, v2 +; CHECK-NEXT: v_ffbh_u32_e32 v4, v1 +; CHECK-NEXT: v_sub_u32_e32 v5, 31, v4 +; CHECK-NEXT: v_lshlrev_b32_e64 v5, v5, 1 +; CHECK-NEXT: v_xor_b32_e32 v5, v1, v5 +; CHECK-NEXT: v_add_u32_e32 v6, -8, v4 +; CHECK-NEXT: v_sub_u32_e32 v4, 0x9b, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v5, v6, v5 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 23, v4 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 +; CHECK-NEXT: v_add_u32_e32 v2, 0.5, v2 +; CHECK-NEXT: v_or3_b32 v4, v3, v4, v5 +; CHECK-NEXT: s_and_b64 vcc, s[4:5], vcc +; CHECK-NEXT: v_or_b32_e32 v0, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN") + ret float %r +} + +; Float4E2M1FN (FiniteOnly) +; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2 +; No Inf, no NaN. + +; Float4E2M1FN normal: 0_01_0 = 1.0 +define float @from_f4e2m1fn_normal() { +; CHECK-LABEL: from_f4e2m1fn_normal: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 1.0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN denorm: 0_00_1 = 0.5 +define float @from_f4e2m1fn_denorm() { +; CHECK-LABEL: from_f4e2m1fn_denorm: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0.5 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN max: 0_11_1 = 6.0 +define float @from_f4e2m1fn_max() { +; CHECK-LABEL: from_f4e2m1fn_max: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x40c00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN runtime arg test +define float @from_f4e2m1fn_dynamic(i4 %x) { +; CHECK-LABEL: from_f4e2m1fn_dynamic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; CHECK-NEXT: v_and_b32_e32 v2, 1, v1 +; CHECK-NEXT: v_lshlrev_b32_e32 v4, 28, v1 +; CHECK-NEXT: v_bfe_u32 v1, v1, 1, 2 +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 22, v2 +; CHECK-NEXT: v_and_b32_e32 v4, 0x80000000, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v1 +; CHECK-NEXT: v_or3_b32 v3, v5, v4, v3 +; CHECK-NEXT: v_ffbh_u32_e32 v5, v2 +; CHECK-NEXT: v_sub_u32_e32 v6, 31, v5 +; CHECK-NEXT: v_lshlrev_b32_e64 v6, v6, 1 +; CHECK-NEXT: v_xor_b32_e32 v6, v2, v6 +; CHECK-NEXT: v_add_u32_e32 v7, -8, v5 +; CHECK-NEXT: v_sub_u32_e32 v5, 0x9d, v5 +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v6, v7, v6 +; CHECK-NEXT: v_lshlrev_b32_e32 v5, 23, v5 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 +; CHECK-NEXT: v_add_u32_e32 v3, 0.5, v3 +; CHECK-NEXT: v_or3_b32 v5, v4, v5, v6 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_or_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN") + ret float %r +} + +; Float8E5M2 to f16: 1.0 +define half @from_f8e5m2_to_f16() { +; CHECK-LABEL: from_f8e5m2_to_f16: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2") + ret half %r +} + +; Float8E5M2 to f64: 1.0 +define double @from_f8e5m2_to_f64() { +; CHECK-LABEL: from_f8e5m2_to_f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0x3ff00000 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2") + ret double %r +} + +; Vector test: Float4E2M1FN <4 x i4> -> <4 x float> +define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) { +; CHECK-LABEL: fp4_to_f32_vec: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v0 +; CHECK-NEXT: v_and_b32_e32 v5, 1, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4 +; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2 +; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5 +; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4 +; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6 +; CHECK-NEXT: v_ffbh_u32_e32 v8, v5 +; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8 +; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1 +; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9 +; CHECK-NEXT: v_add_u32_e32 v10, -8, v8 +; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8 +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 +; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6 +; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_or_b32_e32 v4, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; CHECK-NEXT: v_and_b32_e32 v5, 1, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4 +; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2 +; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5 +; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4 +; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6 +; CHECK-NEXT: v_ffbh_u32_e32 v8, v5 +; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8 +; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1 +; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9 +; CHECK-NEXT: v_add_u32_e32 v10, -8, v8 +; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8 +; CHECK-NEXT: v_and_b32_e32 v1, 1, v1 +; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1 +; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6 +; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_or_b32_e32 v4, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; CHECK-NEXT: v_and_b32_e32 v5, 1, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4 +; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2 +; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5 +; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4 +; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6 +; CHECK-NEXT: v_ffbh_u32_e32 v8, v5 +; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8 +; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1 +; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9 +; CHECK-NEXT: v_add_u32_e32 v10, -8, v8 +; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8 +; CHECK-NEXT: v_and_b32_e32 v2, 1, v2 +; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2 +; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6 +; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_or_b32_e32 v4, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_and_b32_e32 v4, 0xffff, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; CHECK-NEXT: v_and_b32_e32 v5, 1, v4 +; CHECK-NEXT: v_lshlrev_b32_e32 v7, 28, v4 +; CHECK-NEXT: v_bfe_u32 v4, v4, 1, 2 +; CHECK-NEXT: v_lshlrev_b32_e32 v6, 22, v5 +; CHECK-NEXT: v_and_b32_e32 v7, 0x80000000, v7 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v4 +; CHECK-NEXT: v_or3_b32 v6, v8, v7, v6 +; CHECK-NEXT: v_ffbh_u32_e32 v8, v5 +; CHECK-NEXT: v_sub_u32_e32 v9, 31, v8 +; CHECK-NEXT: v_lshlrev_b32_e64 v9, v9, 1 +; CHECK-NEXT: v_xor_b32_e32 v9, v5, v9 +; CHECK-NEXT: v_add_u32_e32 v10, -8, v8 +; CHECK-NEXT: v_sub_u32_e32 v8, 0x9d, v8 +; CHECK-NEXT: v_and_b32_e32 v3, 1, v3 +; CHECK-NEXT: v_lshlrev_b32_e32 v9, v10, v9 +; CHECK-NEXT: v_lshlrev_b32_e32 v8, 23, v8 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v3 +; CHECK-NEXT: v_add_u32_e32 v6, 0.5, v6 +; CHECK-NEXT: v_or3_b32 v8, v7, v8, v9 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_or_b32_e32 v4, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN") + ret <4 x float> %r +} diff --git a/llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll b/llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll new file mode 100644 index 0000000000000..aff1bd385308d --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/arbitrary-fp-to-float.ll @@ -0,0 +1,761 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s + +; Test llvm.convert.from.arbitrary intrinsic expansion. + +declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata) +declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata) +declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata) +declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata) + +declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata) +declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata) + +; Float8E5M2 +; Layout: sign(1) exp(5) mant(2), bias=15 +; Supports: Inf, NaN, signed zero, denormals + +; Float8E5M2 normal: 0_01111_00 = 1.0 +define float @from_f8e5m2_normal() { +; CHECK-LABEL: from_f8e5m2_normal( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 zero: 0_00000_00 = +0.0 +define float @from_f8e5m2_zero() { +; CHECK-LABEL: from_f8e5m2_zero( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 0; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 negative zero: 1_00000_00 = -0.0 +define float @from_f8e5m2_neg_zero() { +; CHECK-LABEL: from_f8e5m2_neg_zero( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], -2147483648; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 denorm: 0_00000_01 = 2^(-16) +define float @from_f8e5m2_denorm() { +; CHECK-LABEL: from_f8e5m2_denorm( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 931135488; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 +Inf: 0_11111_00 +define float @from_f8e5m2_inf() { +; CHECK-LABEL: from_f8e5m2_inf( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 2139095040; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 NaN: 0_11111_01 +define float @from_f8e5m2_nan() { +; CHECK-LABEL: from_f8e5m2_nan( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 2143289344; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 max: 0_11110_11 = 57344 +define float @from_f8e5m2_max() { +; CHECK-LABEL: from_f8e5m2_max( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1197473792; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 negative: 1_01111_00 = -1.0 +define float @from_f8e5m2_neg() { +; CHECK-LABEL: from_f8e5m2_neg( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], -1082130432; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 runtime arg test +define float @from_f8e5m2_dynamic(i8 %x) { +; CHECK-LABEL: from_f8e5m2_dynamic( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<6>; +; CHECK-NEXT: .reg .b32 %r<31>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %r1, [from_f8e5m2_dynamic_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 24; +; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; +; CHECK-NEXT: and.b32 %r4, %r1, 3; +; CHECK-NEXT: clz.b32 %r5, %r4; +; CHECK-NEXT: sub.s32 %r6, 142, %r5; +; CHECK-NEXT: shl.b32 %r7, %r6, 23; +; CHECK-NEXT: or.b32 %r8, %r3, %r7; +; CHECK-NEXT: sub.s32 %r9, 31, %r5; +; CHECK-NEXT: mov.b32 %r10, 1; +; CHECK-NEXT: shl.b32 %r11, %r10, %r9; +; CHECK-NEXT: xor.b32 %r12, %r4, %r11; +; CHECK-NEXT: add.s32 %r13, %r5, -8; +; CHECK-NEXT: shl.b32 %r14, %r12, %r13; +; CHECK-NEXT: or.b32 %r15, %r8, %r14; +; CHECK-NEXT: bfe.u32 %r16, %r1, 2, 5; +; CHECK-NEXT: shl.b32 %r17, %r16, 23; +; CHECK-NEXT: or.b32 %r18, %r17, %r3; +; CHECK-NEXT: shl.b32 %r19, %r4, 21; +; CHECK-NEXT: or.b32 %r20, %r18, %r19; +; CHECK-NEXT: add.s32 %r21, %r20, 939524096; +; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0; +; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1; +; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0; +; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2; +; CHECK-NEXT: or.b32 %r24, %r16, %r4; +; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0; +; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3; +; CHECK-NEXT: setp.eq.b32 %p4, %r4, 0; +; CHECK-NEXT: or.b32 %r26, %r3, 2139095040; +; CHECK-NEXT: selp.b32 %r27, %r26, %r25, %p4; +; CHECK-NEXT: setp.eq.b32 %p5, %r16, 31; +; CHECK-NEXT: selp.b32 %r28, %r27, %r25, %p5; +; CHECK-NEXT: selp.b32 %r29, 2143289344, %r28, %p1; +; CHECK-NEXT: selp.b32 %r30, %r29, %r28, %p5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r30; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2") + ret float %r +} + +; Float8E4M3FN (NanOnly, NanEncoding=AllOnes) +; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7 +; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite. + +; Float8E4M3FN normal: 0_0111_000 = 1.0 +define float @from_f8e4m3fn_normal() { +; CHECK-LABEL: from_f8e4m3fn_normal( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN NaN: 0_1111_111 +define float @from_f8e4m3fn_nan() { +; CHECK-LABEL: from_f8e4m3fn_nan( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 2143289344; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN not-NaN: 0_1111_110 = 448 +; Despite exp=all-ones, this is a valid finite number (max value) +define float @from_f8e4m3fn_max() { +; CHECK-LABEL: from_f8e4m3fn_max( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1138753536; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN not-NaN: 0_1111_101 = 416 +; exp=all-ones but mant!=all-ones so this is finite +define float @from_f8e4m3fn_not_nan() { +; CHECK-LABEL: from_f8e4m3fn_not_nan( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1137704960; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN zero: 0_0000_000 = +0.0 +define float @from_f8e4m3fn_zero() { +; CHECK-LABEL: from_f8e4m3fn_zero( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 0; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN denorm: 0_0000_001 = 2^(-9) +define float @from_f8e4m3fn_denorm() { +; CHECK-LABEL: from_f8e4m3fn_denorm( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 989855744; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN runtime arg test +define float @from_f8e4m3fn_dynamic(i8 %x) { +; CHECK-LABEL: from_f8e4m3fn_dynamic( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<6>; +; CHECK-NEXT: .reg .b32 %r<28>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %r1, [from_f8e4m3fn_dynamic_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 24; +; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; +; CHECK-NEXT: and.b32 %r4, %r1, 7; +; CHECK-NEXT: clz.b32 %r5, %r4; +; CHECK-NEXT: sub.s32 %r6, 149, %r5; +; CHECK-NEXT: shl.b32 %r7, %r6, 23; +; CHECK-NEXT: or.b32 %r8, %r3, %r7; +; CHECK-NEXT: sub.s32 %r9, 31, %r5; +; CHECK-NEXT: mov.b32 %r10, 1; +; CHECK-NEXT: shl.b32 %r11, %r10, %r9; +; CHECK-NEXT: xor.b32 %r12, %r4, %r11; +; CHECK-NEXT: add.s32 %r13, %r5, -8; +; CHECK-NEXT: shl.b32 %r14, %r12, %r13; +; CHECK-NEXT: or.b32 %r15, %r8, %r14; +; CHECK-NEXT: bfe.u32 %r16, %r1, 3, 4; +; CHECK-NEXT: shl.b32 %r17, %r16, 23; +; CHECK-NEXT: or.b32 %r18, %r17, %r3; +; CHECK-NEXT: shl.b32 %r19, %r4, 20; +; CHECK-NEXT: or.b32 %r20, %r18, %r19; +; CHECK-NEXT: add.s32 %r21, %r20, 1006632960; +; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0; +; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1; +; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0; +; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2; +; CHECK-NEXT: or.b32 %r24, %r16, %r4; +; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0; +; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3; +; CHECK-NEXT: setp.eq.b32 %p4, %r4, 7; +; CHECK-NEXT: selp.b32 %r26, 2143289344, %r25, %p4; +; CHECK-NEXT: setp.eq.b32 %p5, %r16, 15; +; CHECK-NEXT: selp.b32 %r27, %r26, %r25, %p5; +; CHECK-NEXT: st.param.b32 [func_retval0], %r27; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN") + ret float %r +} + +; Float6E3M2FN (FiniteOnly) +; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4 +; No Inf, no NaN. All bit patterns are finite. + +; Float6E3M2FN normal: 0_011_00 = 1.0 +define float @from_f6e3m2fn_normal() { +; CHECK-LABEL: from_f6e3m2fn_normal( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN max: 0_111_11 = 28.0 +define float @from_f6e3m2fn_max() { +; CHECK-LABEL: from_f6e3m2fn_max( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1105199104; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN denorm: 0_000_01 = 0.0625 +define float @from_f6e3m2fn_denorm() { +; CHECK-LABEL: from_f6e3m2fn_denorm( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1031798784; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN zero: 0_000_00 = +0.0 +define float @from_f6e3m2fn_zero() { +; CHECK-LABEL: from_f6e3m2fn_zero( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 0; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN negative: 1_011_00 = -1.0 +define float @from_f6e3m2fn_neg() { +; CHECK-LABEL: from_f6e3m2fn_neg( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], -1082130432; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN runtime arg test +define float @from_f6e3m2fn_dynamic(i6 %x) { +; CHECK-LABEL: from_f6e3m2fn_dynamic( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b16 %rs<5>; +; CHECK-NEXT: .reg .b32 %r<26>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [from_f6e3m2fn_dynamic_param_0+1]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 8; +; CHECK-NEXT: ld.param.b8 %rs3, [from_f6e3m2fn_dynamic_param_0]; +; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; +; CHECK-NEXT: shl.b32 %r2, %r1, 26; +; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; +; CHECK-NEXT: and.b32 %r4, %r1, 3; +; CHECK-NEXT: clz.b32 %r5, %r4; +; CHECK-NEXT: sub.s32 %r6, 154, %r5; +; CHECK-NEXT: shl.b32 %r7, %r6, 23; +; CHECK-NEXT: or.b32 %r8, %r3, %r7; +; CHECK-NEXT: sub.s32 %r9, 31, %r5; +; CHECK-NEXT: mov.b32 %r10, 1; +; CHECK-NEXT: shl.b32 %r11, %r10, %r9; +; CHECK-NEXT: xor.b32 %r12, %r4, %r11; +; CHECK-NEXT: add.s32 %r13, %r5, -8; +; CHECK-NEXT: shl.b32 %r14, %r12, %r13; +; CHECK-NEXT: or.b32 %r15, %r8, %r14; +; CHECK-NEXT: bfe.u32 %r16, %r1, 2, 3; +; CHECK-NEXT: shl.b32 %r17, %r16, 23; +; CHECK-NEXT: or.b32 %r18, %r17, %r3; +; CHECK-NEXT: shl.b32 %r19, %r4, 21; +; CHECK-NEXT: or.b32 %r20, %r18, %r19; +; CHECK-NEXT: add.s32 %r21, %r20, 1040187392; +; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0; +; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1; +; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0; +; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2; +; CHECK-NEXT: or.b32 %r24, %r16, %r4; +; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0; +; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r25; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E2M3FN (FiniteOnly) +; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2 +; No Inf, no NaN. All bit patterns are finite. + +; Float6E2M3FN normal: 0_01_000 = 1.0 +define float @from_f6e2m3fn_normal() { +; CHECK-LABEL: from_f6e2m3fn_normal( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN max: 0_11_111 = 7.5 +define float @from_f6e2m3fn_max() { +; CHECK-LABEL: from_f6e2m3fn_max( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1089470464; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN denorm: 0_00_001 = 0.125 +define float @from_f6e2m3fn_denorm() { +; CHECK-LABEL: from_f6e2m3fn_denorm( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1040187392; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN zero: 0_00_000 = +0.0 +define float @from_f6e2m3fn_zero() { +; CHECK-LABEL: from_f6e2m3fn_zero( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 0; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN runtime arg test +define float @from_f6e2m3fn_dynamic(i6 %x) { +; CHECK-LABEL: from_f6e2m3fn_dynamic( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b16 %rs<5>; +; CHECK-NEXT: .reg .b32 %r<26>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [from_f6e2m3fn_dynamic_param_0+1]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 8; +; CHECK-NEXT: ld.param.b8 %rs3, [from_f6e2m3fn_dynamic_param_0]; +; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; +; CHECK-NEXT: shl.b32 %r2, %r1, 26; +; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; +; CHECK-NEXT: and.b32 %r4, %r1, 7; +; CHECK-NEXT: clz.b32 %r5, %r4; +; CHECK-NEXT: sub.s32 %r6, 155, %r5; +; CHECK-NEXT: shl.b32 %r7, %r6, 23; +; CHECK-NEXT: or.b32 %r8, %r3, %r7; +; CHECK-NEXT: sub.s32 %r9, 31, %r5; +; CHECK-NEXT: mov.b32 %r10, 1; +; CHECK-NEXT: shl.b32 %r11, %r10, %r9; +; CHECK-NEXT: xor.b32 %r12, %r4, %r11; +; CHECK-NEXT: add.s32 %r13, %r5, -8; +; CHECK-NEXT: shl.b32 %r14, %r12, %r13; +; CHECK-NEXT: or.b32 %r15, %r8, %r14; +; CHECK-NEXT: bfe.u32 %r16, %r1, 3, 2; +; CHECK-NEXT: shl.b32 %r17, %r16, 23; +; CHECK-NEXT: or.b32 %r18, %r17, %r3; +; CHECK-NEXT: shl.b32 %r19, %r4, 20; +; CHECK-NEXT: or.b32 %r20, %r18, %r19; +; CHECK-NEXT: add.s32 %r21, %r20, 1056964608; +; CHECK-NEXT: setp.ne.b32 %p1, %r4, 0; +; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1; +; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0; +; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2; +; CHECK-NEXT: or.b32 %r24, %r16, %r4; +; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0; +; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r25; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN") + ret float %r +} + +; Float4E2M1FN (FiniteOnly) +; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2 +; No Inf, no NaN. + +; Float4E2M1FN normal: 0_01_0 = 1.0 +define float @from_f4e2m1fn_normal() { +; CHECK-LABEL: from_f4e2m1fn_normal( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1065353216; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN denorm: 0_00_1 = 0.5 +define float @from_f4e2m1fn_denorm() { +; CHECK-LABEL: from_f4e2m1fn_denorm( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1056964608; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN max: 0_11_1 = 6.0 +define float @from_f4e2m1fn_max() { +; CHECK-LABEL: from_f4e2m1fn_max( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b32 [func_retval0], 1086324736; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN runtime arg test +define float @from_f4e2m1fn_dynamic(i4 %x) { +; CHECK-LABEL: from_f4e2m1fn_dynamic( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b16 %rs<6>; +; CHECK-NEXT: .reg .b32 %r<26>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [from_f4e2m1fn_dynamic_param_0+1]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 8; +; CHECK-NEXT: ld.param.b8 %rs3, [from_f4e2m1fn_dynamic_param_0]; +; CHECK-NEXT: or.b16 %rs4, %rs2, %rs3; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; +; CHECK-NEXT: shl.b32 %r2, %r1, 28; +; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; +; CHECK-NEXT: and.b32 %r4, %r1, 1; +; CHECK-NEXT: clz.b32 %r5, %r4; +; CHECK-NEXT: sub.s32 %r6, 157, %r5; +; CHECK-NEXT: shl.b32 %r7, %r6, 23; +; CHECK-NEXT: or.b32 %r8, %r3, %r7; +; CHECK-NEXT: sub.s32 %r9, 31, %r5; +; CHECK-NEXT: mov.b32 %r10, 1; +; CHECK-NEXT: shl.b32 %r11, %r10, %r9; +; CHECK-NEXT: xor.b32 %r12, %r4, %r11; +; CHECK-NEXT: add.s32 %r13, %r5, -8; +; CHECK-NEXT: shl.b32 %r14, %r12, %r13; +; CHECK-NEXT: or.b32 %r15, %r8, %r14; +; CHECK-NEXT: bfe.u32 %r16, %r1, 1, 2; +; CHECK-NEXT: shl.b32 %r17, %r16, 23; +; CHECK-NEXT: or.b32 %r18, %r17, %r3; +; CHECK-NEXT: shl.b32 %r19, %r4, 22; +; CHECK-NEXT: or.b32 %r20, %r18, %r19; +; CHECK-NEXT: add.s32 %r21, %r20, 1056964608; +; CHECK-NEXT: and.b16 %rs5, %rs3, 1; +; CHECK-NEXT: setp.ne.b16 %p1, %rs5, 0; +; CHECK-NEXT: selp.b32 %r22, %r15, %r21, %p1; +; CHECK-NEXT: setp.eq.b32 %p2, %r16, 0; +; CHECK-NEXT: selp.b32 %r23, %r22, %r21, %p2; +; CHECK-NEXT: or.b32 %r24, %r16, %r4; +; CHECK-NEXT: setp.eq.b32 %p3, %r24, 0; +; CHECK-NEXT: selp.b32 %r25, %r3, %r23, %p3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r25; +; CHECK-NEXT: ret; + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN") + ret float %r +} + +; Float8E5M2 to f16: 1.0 +define half @from_f8e5m2_to_f16() { +; CHECK-LABEL: from_f8e5m2_to_f16( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b16 [func_retval0], 0x3C00; +; CHECK-NEXT: ret; + %r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2") + ret half %r +} + +; Float8E5M2 to f64: 1.0 +define double @from_f8e5m2_to_f64() { +; CHECK-LABEL: from_f8e5m2_to_f64( +; CHECK: { +; CHECK-EMPTY: +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: st.param.b64 [func_retval0], 4607182418800017408; +; CHECK-NEXT: ret; + %r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2") + ret double %r +} + +; Vector test: Float4E2M1FN <4 x i4> -> <4 x float> +define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) { +; CHECK-LABEL: fp4_to_f32_vec( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<13>; +; CHECK-NEXT: .reg .b32 %r<101>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %r1, [fp4_to_f32_vec_param_0+2]; +; CHECK-NEXT: shl.b32 %r2, %r1, 16; +; CHECK-NEXT: ld.param.b16 %r3, [fp4_to_f32_vec_param_0]; +; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 0x7771U; +; CHECK-NEXT: shl.b32 %r5, %r4, 28; +; CHECK-NEXT: and.b32 %r6, %r5, -2147483648; +; CHECK-NEXT: and.b32 %r7, %r4, 1; +; CHECK-NEXT: clz.b32 %r8, %r7; +; CHECK-NEXT: sub.s32 %r9, 157, %r8; +; CHECK-NEXT: shl.b32 %r10, %r9, 23; +; CHECK-NEXT: or.b32 %r11, %r6, %r10; +; CHECK-NEXT: sub.s32 %r12, 31, %r8; +; CHECK-NEXT: mov.b32 %r13, 1; +; CHECK-NEXT: shl.b32 %r14, %r13, %r12; +; CHECK-NEXT: xor.b32 %r15, %r7, %r14; +; CHECK-NEXT: add.s32 %r16, %r8, -8; +; CHECK-NEXT: shl.b32 %r17, %r15, %r16; +; CHECK-NEXT: or.b32 %r18, %r11, %r17; +; CHECK-NEXT: bfe.u32 %r19, %r4, 1, 2; +; CHECK-NEXT: shl.b32 %r20, %r19, 23; +; CHECK-NEXT: or.b32 %r21, %r20, %r6; +; CHECK-NEXT: shl.b32 %r22, %r7, 22; +; CHECK-NEXT: or.b32 %r23, %r21, %r22; +; CHECK-NEXT: add.s32 %r24, %r23, 1056964608; +; CHECK-NEXT: setp.ne.b32 %p1, %r7, 0; +; CHECK-NEXT: selp.b32 %r25, %r18, %r24, %p1; +; CHECK-NEXT: setp.eq.b32 %p2, %r19, 0; +; CHECK-NEXT: selp.b32 %r26, %r25, %r24, %p2; +; CHECK-NEXT: or.b32 %r27, %r19, %r7; +; CHECK-NEXT: setp.eq.b32 %p3, %r27, 0; +; CHECK-NEXT: selp.b32 %r28, %r6, %r26, %p3; +; CHECK-NEXT: prmt.b32 %r29, %r3, 0, 0x7770U; +; CHECK-NEXT: shl.b32 %r30, %r29, 28; +; CHECK-NEXT: and.b32 %r31, %r30, -2147483648; +; CHECK-NEXT: and.b32 %r32, %r29, 1; +; CHECK-NEXT: clz.b32 %r33, %r32; +; CHECK-NEXT: sub.s32 %r34, 157, %r33; +; CHECK-NEXT: shl.b32 %r35, %r34, 23; +; CHECK-NEXT: or.b32 %r36, %r31, %r35; +; CHECK-NEXT: sub.s32 %r37, 31, %r33; +; CHECK-NEXT: shl.b32 %r38, %r13, %r37; +; CHECK-NEXT: xor.b32 %r39, %r32, %r38; +; CHECK-NEXT: add.s32 %r40, %r33, -8; +; CHECK-NEXT: shl.b32 %r41, %r39, %r40; +; CHECK-NEXT: or.b32 %r42, %r36, %r41; +; CHECK-NEXT: bfe.u32 %r43, %r29, 1, 2; +; CHECK-NEXT: shl.b32 %r44, %r43, 23; +; CHECK-NEXT: or.b32 %r45, %r44, %r31; +; CHECK-NEXT: shl.b32 %r46, %r32, 22; +; CHECK-NEXT: or.b32 %r47, %r45, %r46; +; CHECK-NEXT: add.s32 %r48, %r47, 1056964608; +; CHECK-NEXT: setp.ne.b32 %p4, %r32, 0; +; CHECK-NEXT: selp.b32 %r49, %r42, %r48, %p4; +; CHECK-NEXT: setp.eq.b32 %p5, %r43, 0; +; CHECK-NEXT: selp.b32 %r50, %r49, %r48, %p5; +; CHECK-NEXT: or.b32 %r51, %r43, %r32; +; CHECK-NEXT: setp.eq.b32 %p6, %r51, 0; +; CHECK-NEXT: selp.b32 %r52, %r31, %r50, %p6; +; CHECK-NEXT: prmt.b32 %r53, %r2, 0, 0x7773U; +; CHECK-NEXT: shl.b32 %r54, %r53, 28; +; CHECK-NEXT: and.b32 %r55, %r54, -2147483648; +; CHECK-NEXT: and.b32 %r56, %r53, 1; +; CHECK-NEXT: clz.b32 %r57, %r56; +; CHECK-NEXT: sub.s32 %r58, 157, %r57; +; CHECK-NEXT: shl.b32 %r59, %r58, 23; +; CHECK-NEXT: or.b32 %r60, %r55, %r59; +; CHECK-NEXT: sub.s32 %r61, 31, %r57; +; CHECK-NEXT: shl.b32 %r62, %r13, %r61; +; CHECK-NEXT: xor.b32 %r63, %r56, %r62; +; CHECK-NEXT: add.s32 %r64, %r57, -8; +; CHECK-NEXT: shl.b32 %r65, %r63, %r64; +; CHECK-NEXT: or.b32 %r66, %r60, %r65; +; CHECK-NEXT: bfe.u32 %r67, %r53, 1, 2; +; CHECK-NEXT: shl.b32 %r68, %r67, 23; +; CHECK-NEXT: or.b32 %r69, %r68, %r55; +; CHECK-NEXT: shl.b32 %r70, %r56, 22; +; CHECK-NEXT: or.b32 %r71, %r69, %r70; +; CHECK-NEXT: add.s32 %r72, %r71, 1056964608; +; CHECK-NEXT: setp.ne.b32 %p7, %r56, 0; +; CHECK-NEXT: selp.b32 %r73, %r66, %r72, %p7; +; CHECK-NEXT: setp.eq.b32 %p8, %r67, 0; +; CHECK-NEXT: selp.b32 %r74, %r73, %r72, %p8; +; CHECK-NEXT: or.b32 %r75, %r67, %r56; +; CHECK-NEXT: setp.eq.b32 %p9, %r75, 0; +; CHECK-NEXT: selp.b32 %r76, %r55, %r74, %p9; +; CHECK-NEXT: prmt.b32 %r77, %r2, 0, 0x7772U; +; CHECK-NEXT: shl.b32 %r78, %r77, 28; +; CHECK-NEXT: and.b32 %r79, %r78, -2147483648; +; CHECK-NEXT: and.b32 %r80, %r77, 1; +; CHECK-NEXT: clz.b32 %r81, %r80; +; CHECK-NEXT: sub.s32 %r82, 157, %r81; +; CHECK-NEXT: shl.b32 %r83, %r82, 23; +; CHECK-NEXT: or.b32 %r84, %r79, %r83; +; CHECK-NEXT: sub.s32 %r85, 31, %r81; +; CHECK-NEXT: shl.b32 %r86, %r13, %r85; +; CHECK-NEXT: xor.b32 %r87, %r80, %r86; +; CHECK-NEXT: add.s32 %r88, %r81, -8; +; CHECK-NEXT: shl.b32 %r89, %r87, %r88; +; CHECK-NEXT: or.b32 %r90, %r84, %r89; +; CHECK-NEXT: bfe.u32 %r91, %r77, 1, 2; +; CHECK-NEXT: shl.b32 %r92, %r91, 23; +; CHECK-NEXT: or.b32 %r93, %r92, %r79; +; CHECK-NEXT: shl.b32 %r94, %r80, 22; +; CHECK-NEXT: or.b32 %r95, %r93, %r94; +; CHECK-NEXT: add.s32 %r96, %r95, 1056964608; +; CHECK-NEXT: setp.ne.b32 %p10, %r80, 0; +; CHECK-NEXT: selp.b32 %r97, %r90, %r96, %p10; +; CHECK-NEXT: setp.eq.b32 %p11, %r91, 0; +; CHECK-NEXT: selp.b32 %r98, %r97, %r96, %p11; +; CHECK-NEXT: or.b32 %r99, %r91, %r80; +; CHECK-NEXT: setp.eq.b32 %p12, %r99, 0; +; CHECK-NEXT: selp.b32 %r100, %r79, %r98, %p12; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r52, %r28, %r100, %r76}; +; CHECK-NEXT: ret; + %r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN") + ret <4 x float> %r +} diff --git a/llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll b/llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll new file mode 100644 index 0000000000000..e60fed64fd179 --- /dev/null +++ b/llvm/test/CodeGen/X86/arbitrary-fp-to-float.ll @@ -0,0 +1,727 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +; Test llvm.convert.from.arbitrary intrinsic expansion. + +declare float @llvm.convert.from.arbitrary.fp.f32.i8(i8, metadata) +declare float @llvm.convert.from.arbitrary.fp.f32.i6(i6, metadata) +declare float @llvm.convert.from.arbitrary.fp.f32.i4(i4, metadata) +declare <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4>, metadata) + +declare half @llvm.convert.from.arbitrary.fp.f16.i8(i8, metadata) +declare double @llvm.convert.from.arbitrary.fp.f64.i8(i8, metadata) + +; Float8E5M2 +; Layout: sign(1) exp(5) mant(2), bias=15 +; Supports: Inf, NaN, signed zero, denormals + +; Float8E5M2 normal: 0_01111_00 = 1.0 +define float @from_f8e5m2_normal() { +; CHECK-LABEL: from_f8e5m2_normal: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 60, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 zero: 0_00000_00 = +0.0 +define float @from_f8e5m2_zero() { +; CHECK-LABEL: from_f8e5m2_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 negative zero: 1_00000_00 = -0.0 +define float @from_f8e5m2_neg_zero() { +; CHECK-LABEL: from_f8e5m2_neg_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [-0.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -128, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 denorm: 0_00000_01 = 2^(-16) +define float @from_f8e5m2_denorm() { +; CHECK-LABEL: from_f8e5m2_denorm: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.52587891E-5,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 +Inf: 0_11111_00 +define float @from_f8e5m2_inf() { +; CHECK-LABEL: from_f8e5m2_inf: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 124, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 NaN: 0_11111_01 +define float @from_f8e5m2_nan() { +; CHECK-LABEL: from_f8e5m2_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 max: 0_11110_11 = 57344 +define float @from_f8e5m2_max() { +; CHECK-LABEL: from_f8e5m2_max: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [5.7344E+4,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 123, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 negative: 1_01111_00 = -1.0 +define float @from_f8e5m2_neg() { +; CHECK-LABEL: from_f8e5m2_neg: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 -68, metadata !"Float8E5M2") + ret float %r +} + +; Float8E5M2 runtime arg test +define float @from_f8e5m2_dynamic(i8 %x) { +; CHECK-LABEL: from_f8e5m2_dynamic: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $3, %edx +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: shll $21, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $-128, %eax +; CHECK-NEXT: shll $24, %eax +; CHECK-NEXT: shrl $2, %edi +; CHECK-NEXT: andl $31, %edi +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shll $23, %esi +; CHECK-NEXT: orl %eax, %esi +; CHECK-NEXT: leal 939524096(%rcx,%rsi), %esi +; CHECK-NEXT: bsrl %edx, %r8d +; CHECK-NEXT: movl %edx, %r9d +; CHECK-NEXT: btcl %r8d, %r9d +; CHECK-NEXT: xorl $31, %r8d +; CHECK-NEXT: leal -8(%r8), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r9d +; CHECK-NEXT: movl $142, %ecx +; CHECK-NEXT: subl %r8d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: orl %r9d, %ecx +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: sete %dl +; CHECK-NEXT: setne %r8b +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %r9b +; CHECK-NEXT: testb %r8b, %r9b +; CHECK-NEXT: cmovel %esi, %ecx +; CHECK-NEXT: testb %dl, %r9b +; CHECK-NEXT: cmovnel %eax, %ecx +; CHECK-NEXT: orl $2139095040, %eax # imm = 0x7F800000 +; CHECK-NEXT: cmpl $31, %edi +; CHECK-NEXT: sete %sil +; CHECK-NEXT: testb %dl, %sil +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: testb %r8b, %sil +; CHECK-NEXT: movl $2143289344, %ecx # imm = 0x7FC00000 +; CHECK-NEXT: cmovel %eax, %ecx +; CHECK-NEXT: movd %ecx, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E5M2") + ret float %r +} + +; Float8E4M3FN (NanOnly, NanEncoding=AllOnes) +; Layout: sign(1) exp(4) mant(3), maxExp=8, minExp=-6, bias=7 +; Only 0_1111_111 and 1_1111_111 are NaN; all other exp=15 values are finite. + +; Float8E4M3FN normal: 0_0111_000 = 1.0 +define float @from_f8e4m3fn_normal() { +; CHECK-LABEL: from_f8e4m3fn_normal: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 56, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN NaN: 0_1111_111 +define float @from_f8e4m3fn_nan() { +; CHECK-LABEL: from_f8e4m3fn_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 127, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN not-NaN: 0_1111_110 = 448 +; Despite exp=all-ones, this is a valid finite number (max value) +define float @from_f8e4m3fn_max() { +; CHECK-LABEL: from_f8e4m3fn_max: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.48E+2,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 126, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN not-NaN: 0_1111_101 = 416 +; exp=all-ones but mant!=all-ones so this is finite +define float @from_f8e4m3fn_not_nan() { +; CHECK-LABEL: from_f8e4m3fn_not_nan: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.16E+2,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 125, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN zero: 0_0000_000 = +0.0 +define float @from_f8e4m3fn_zero() { +; CHECK-LABEL: from_f8e4m3fn_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 0, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN denorm: 0_0000_001 = 2^(-9) +define float @from_f8e4m3fn_denorm() { +; CHECK-LABEL: from_f8e4m3fn_denorm: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.953125E-3,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 1, metadata !"Float8E4M3FN") + ret float %r +} + +; Float8E4M3FN runtime arg test +define float @from_f8e4m3fn_dynamic(i8 %x) { +; CHECK-LABEL: from_f8e4m3fn_dynamic: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $7, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shll $20, %ecx +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $-128, %edx +; CHECK-NEXT: shll $24, %edx +; CHECK-NEXT: shrl $3, %edi +; CHECK-NEXT: andl $15, %edi +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shll $23, %esi +; CHECK-NEXT: orl %edx, %esi +; CHECK-NEXT: leal 1006632960(%rcx,%rsi), %esi +; CHECK-NEXT: bsrl %eax, %r8d +; CHECK-NEXT: movl %eax, %r9d +; CHECK-NEXT: btcl %r8d, %r9d +; CHECK-NEXT: xorl $31, %r8d +; CHECK-NEXT: leal -8(%r8), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r9d +; CHECK-NEXT: movl $149, %ecx +; CHECK-NEXT: subl %r8d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: orl %r9d, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: sete %r8b +; CHECK-NEXT: setne %r9b +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %r10b +; CHECK-NEXT: testb %r9b, %r10b +; CHECK-NEXT: cmovel %esi, %ecx +; CHECK-NEXT: testb %r8b, %r10b +; CHECK-NEXT: cmovnel %edx, %ecx +; CHECK-NEXT: cmpl $7, %eax +; CHECK-NEXT: sete %al +; CHECK-NEXT: cmpl $15, %edi +; CHECK-NEXT: sete %dl +; CHECK-NEXT: testb %al, %dl +; CHECK-NEXT: movl $2143289344, %eax # imm = 0x7FC00000 +; CHECK-NEXT: cmovel %ecx, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i8(i8 %x, metadata !"Float8E4M3FN") + ret float %r +} + +; Float6E3M2FN (FiniteOnly) +; Layout: sign(1) exp(3) mant(2), bias=3, maxExp=4 +; No Inf, no NaN. All bit patterns are finite. + +; Float6E3M2FN normal: 0_011_00 = 1.0 +define float @from_f6e3m2fn_normal() { +; CHECK-LABEL: from_f6e3m2fn_normal: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 12, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN max: 0_111_11 = 28.0 +define float @from_f6e3m2fn_max() { +; CHECK-LABEL: from_f6e3m2fn_max: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.8E+1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN denorm: 0_000_01 = 0.0625 +define float @from_f6e3m2fn_denorm() { +; CHECK-LABEL: from_f6e3m2fn_denorm: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [6.25E-2,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN zero: 0_000_00 = +0.0 +define float @from_f6e3m2fn_zero() { +; CHECK-LABEL: from_f6e3m2fn_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN negative: 1_011_00 = -1.0 +define float @from_f6e3m2fn_neg() { +; CHECK-LABEL: from_f6e3m2fn_neg: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 -20, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E3M2FN runtime arg test +define float @from_f6e3m2fn_dynamic(i6 %x) { +; CHECK-LABEL: from_f6e3m2fn_dynamic: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $3, %edx +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: shll $21, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $-32, %eax +; CHECK-NEXT: shll $26, %eax +; CHECK-NEXT: shrl $2, %edi +; CHECK-NEXT: andl $7, %edi +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shll $23, %esi +; CHECK-NEXT: orl %eax, %esi +; CHECK-NEXT: leal 1040187392(%rcx,%rsi), %esi +; CHECK-NEXT: bsrl %edx, %r8d +; CHECK-NEXT: movl %edx, %r9d +; CHECK-NEXT: btcl %r8d, %r9d +; CHECK-NEXT: xorl $31, %r8d +; CHECK-NEXT: leal -8(%r8), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r9d +; CHECK-NEXT: movl $154, %ecx +; CHECK-NEXT: subl %r8d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: orl %r9d, %ecx +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: sete %dl +; CHECK-NEXT: setne %r8b +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %dil +; CHECK-NEXT: testb %r8b, %dil +; CHECK-NEXT: cmovel %esi, %ecx +; CHECK-NEXT: testb %dl, %dil +; CHECK-NEXT: cmovnel %eax, %ecx +; CHECK-NEXT: movd %ecx, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E3M2FN") + ret float %r +} + +; Float6E2M3FN (FiniteOnly) +; Layout: sign(1) exp(2) mant(3), bias=1, maxExp=2 +; No Inf, no NaN. All bit patterns are finite. + +; Float6E2M3FN normal: 0_01_000 = 1.0 +define float @from_f6e2m3fn_normal() { +; CHECK-LABEL: from_f6e2m3fn_normal: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 8, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN max: 0_11_111 = 7.5 +define float @from_f6e2m3fn_max() { +; CHECK-LABEL: from_f6e2m3fn_max: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [7.5E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 31, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN denorm: 0_00_001 = 0.125 +define float @from_f6e2m3fn_denorm() { +; CHECK-LABEL: from_f6e2m3fn_denorm: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.25E-1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 1, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN zero: 0_00_000 = +0.0 +define float @from_f6e2m3fn_zero() { +; CHECK-LABEL: from_f6e2m3fn_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 0, metadata !"Float6E2M3FN") + ret float %r +} + +; Float6E2M3FN runtime arg test +define float @from_f6e2m3fn_dynamic(i6 %x) { +; CHECK-LABEL: from_f6e2m3fn_dynamic: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $7, %edx +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: shll $20, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $-32, %eax +; CHECK-NEXT: shll $26, %eax +; CHECK-NEXT: shrl $3, %edi +; CHECK-NEXT: andl $3, %edi +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shll $23, %esi +; CHECK-NEXT: orl %eax, %esi +; CHECK-NEXT: leal 1056964608(%rcx,%rsi), %esi +; CHECK-NEXT: bsrl %edx, %r8d +; CHECK-NEXT: movl %edx, %r9d +; CHECK-NEXT: btcl %r8d, %r9d +; CHECK-NEXT: xorl $31, %r8d +; CHECK-NEXT: leal -8(%r8), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r9d +; CHECK-NEXT: movl $155, %ecx +; CHECK-NEXT: subl %r8d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: orl %r9d, %ecx +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: sete %dl +; CHECK-NEXT: setne %r8b +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %dil +; CHECK-NEXT: testb %r8b, %dil +; CHECK-NEXT: cmovel %esi, %ecx +; CHECK-NEXT: testb %dl, %dil +; CHECK-NEXT: cmovnel %eax, %ecx +; CHECK-NEXT: movd %ecx, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i6(i6 %x, metadata !"Float6E2M3FN") + ret float %r +} + +; Float4E2M1FN (FiniteOnly) +; Layout: sign(1) exp(2) mant(1), bias=1, maxExp=2 +; No Inf, no NaN. + +; Float4E2M1FN normal: 0_01_0 = 1.0 +define float @from_f4e2m1fn_normal() { +; CHECK-LABEL: from_f4e2m1fn_normal: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 2, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN denorm: 0_00_1 = 0.5 +define float @from_f4e2m1fn_denorm() { +; CHECK-LABEL: from_f4e2m1fn_denorm: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [5.0E-1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 1, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN max: 0_11_1 = 6.0 +define float @from_f4e2m1fn_max() { +; CHECK-LABEL: from_f4e2m1fn_max: +; CHECK: # %bb.0: +; CHECK-NEXT: movss {{.*#+}} xmm0 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 7, metadata !"Float4E2M1FN") + ret float %r +} + +; Float4E2M1FN runtime arg test +define float @from_f4e2m1fn_dynamic(i4 %x) { +; CHECK-LABEL: from_f4e2m1fn_dynamic: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: shll $22, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andl $-8, %eax +; CHECK-NEXT: shll $28, %eax +; CHECK-NEXT: shrl %edi +; CHECK-NEXT: andl $3, %edi +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shll $23, %esi +; CHECK-NEXT: orl %eax, %esi +; CHECK-NEXT: leal 1056964608(%rcx,%rsi), %esi +; CHECK-NEXT: bsrl %edx, %r8d +; CHECK-NEXT: movl %edx, %r9d +; CHECK-NEXT: btcl %r8d, %r9d +; CHECK-NEXT: xorl $31, %r8d +; CHECK-NEXT: leal -8(%r8), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r9d +; CHECK-NEXT: movl $157, %ecx +; CHECK-NEXT: subl %r8d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: orl %r9d, %ecx +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: sete %dl +; CHECK-NEXT: setne %r8b +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %dil +; CHECK-NEXT: testb %r8b, %dil +; CHECK-NEXT: cmovel %esi, %ecx +; CHECK-NEXT: testb %dl, %dil +; CHECK-NEXT: cmovnel %eax, %ecx +; CHECK-NEXT: movd %ecx, %xmm0 +; CHECK-NEXT: retq + %r = call float @llvm.convert.from.arbitrary.fp.f32.i4(i4 %x, metadata !"Float4E2M1FN") + ret float %r +} + +; Float8E5M2 to f16: 1.0 +define half @from_f8e5m2_to_f16() { +; CHECK-LABEL: from_f8e5m2_to_f16: +; CHECK: # %bb.0: +; CHECK-NEXT: pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: retq + %r = call half @llvm.convert.from.arbitrary.fp.f16.i8(i8 60, metadata !"Float8E5M2") + ret half %r +} + +; Float8E5M2 to f64: 1.0 +define double @from_f8e5m2_to_f64() { +; CHECK-LABEL: from_f8e5m2_to_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; CHECK-NEXT: retq + %r = call double @llvm.convert.from.arbitrary.fp.f64.i8(i8 60, metadata !"Float8E5M2") + ret double %r +} + +declare bfloat @llvm.convert.from.arbitrary.fp.bf16.i8(i8, metadata) + +; Float8E5M2 to bf16: 1.0 +; bf16 has: sign(1) exp(8) mant(7), bias=127 +define bfloat @from_f8e5m2_to_bf16() { +; CHECK-LABEL: from_f8e5m2_to_bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: callq __truncsfbf2@PLT +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %r = call bfloat @llvm.convert.from.arbitrary.fp.bf16.i8(i8 60, metadata !"Float8E5M2") + ret bfloat %r +} + +; Vector test: Float4E2M1FN <4 x i4> -> <4 x float> +define <4 x float> @fp4_to_f32_vec(<4 x i4> %x) { +; CHECK-LABEL: fp4_to_f32_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] +; CHECK-NEXT: movd %xmm1, %esi +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: shll $22, %eax +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: andl $-8, %edx +; CHECK-NEXT: shll $28, %edx +; CHECK-NEXT: shrl %esi +; CHECK-NEXT: andl $3, %esi +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: leal 1056964608(%rax,%rcx), %r8d +; CHECK-NEXT: bsrl %edi, %r9d +; CHECK-NEXT: movl %edi, %r10d +; CHECK-NEXT: btcl %r9d, %r10d +; CHECK-NEXT: xorl $31, %r9d +; CHECK-NEXT: leal -8(%r9), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r10d +; CHECK-NEXT: movl $157, %eax +; CHECK-NEXT: movl $157, %ecx +; CHECK-NEXT: subl %r9d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: orl %r10d, %ecx +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %dil +; CHECK-NEXT: setne %r9b +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: sete %sil +; CHECK-NEXT: testb %r9b, %sil +; CHECK-NEXT: cmovel %r8d, %ecx +; CHECK-NEXT: testb %dil, %sil +; CHECK-NEXT: cmovnel %edx, %ecx +; CHECK-NEXT: movd %ecx, %xmm1 +; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; CHECK-NEXT: movd %xmm2, %esi +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: shll $22, %ecx +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: andl $-8, %edx +; CHECK-NEXT: shll $28, %edx +; CHECK-NEXT: shrl %esi +; CHECK-NEXT: andl $3, %esi +; CHECK-NEXT: movl %esi, %r8d +; CHECK-NEXT: shll $23, %r8d +; CHECK-NEXT: orl %edx, %r8d +; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d +; CHECK-NEXT: bsrl %edi, %r9d +; CHECK-NEXT: movl %edi, %r10d +; CHECK-NEXT: btcl %r9d, %r10d +; CHECK-NEXT: xorl $31, %r9d +; CHECK-NEXT: leal -8(%r9), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r10d +; CHECK-NEXT: movl $157, %ecx +; CHECK-NEXT: subl %r9d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: orl %r10d, %ecx +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %dil +; CHECK-NEXT: setne %r9b +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: sete %sil +; CHECK-NEXT: testb %r9b, %sil +; CHECK-NEXT: cmovel %r8d, %ecx +; CHECK-NEXT: testb %dil, %sil +; CHECK-NEXT: cmovnel %edx, %ecx +; CHECK-NEXT: movd %ecx, %xmm2 +; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NEXT: movd %xmm0, %esi +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: shll $22, %ecx +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: andl $-8, %edx +; CHECK-NEXT: shll $28, %edx +; CHECK-NEXT: shrl %esi +; CHECK-NEXT: andl $3, %esi +; CHECK-NEXT: movl %esi, %r8d +; CHECK-NEXT: shll $23, %r8d +; CHECK-NEXT: orl %edx, %r8d +; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d +; CHECK-NEXT: bsrl %edi, %r9d +; CHECK-NEXT: movl %edi, %r10d +; CHECK-NEXT: btcl %r9d, %r10d +; CHECK-NEXT: xorl $31, %r9d +; CHECK-NEXT: leal -8(%r9), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r10d +; CHECK-NEXT: movl $157, %ecx +; CHECK-NEXT: subl %r9d, %ecx +; CHECK-NEXT: shll $23, %ecx +; CHECK-NEXT: orl %edx, %ecx +; CHECK-NEXT: orl %r10d, %ecx +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %dil +; CHECK-NEXT: setne %r9b +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: sete %sil +; CHECK-NEXT: testb %r9b, %sil +; CHECK-NEXT: cmovel %r8d, %ecx +; CHECK-NEXT: testb %dil, %sil +; CHECK-NEXT: cmovnel %edx, %ecx +; CHECK-NEXT: movd %ecx, %xmm1 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] +; CHECK-NEXT: movd %xmm0, %esi +; CHECK-NEXT: movl %esi, %edi +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: shll $22, %ecx +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: andl $-8, %edx +; CHECK-NEXT: shll $28, %edx +; CHECK-NEXT: shrl %esi +; CHECK-NEXT: andl $3, %esi +; CHECK-NEXT: movl %esi, %r8d +; CHECK-NEXT: shll $23, %r8d +; CHECK-NEXT: orl %edx, %r8d +; CHECK-NEXT: leal 1056964608(%rcx,%r8), %r8d +; CHECK-NEXT: bsrl %edi, %r9d +; CHECK-NEXT: movl %edi, %r10d +; CHECK-NEXT: btcl %r9d, %r10d +; CHECK-NEXT: xorl $31, %r9d +; CHECK-NEXT: leal -8(%r9), %ecx +; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: shll %cl, %r10d +; CHECK-NEXT: subl %r9d, %eax +; CHECK-NEXT: shll $23, %eax +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: orl %r10d, %eax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: sete %cl +; CHECK-NEXT: setne %dil +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: sete %sil +; CHECK-NEXT: testb %dil, %sil +; CHECK-NEXT: cmovel %r8d, %eax +; CHECK-NEXT: testb %cl, %sil +; CHECK-NEXT: cmovnel %edx, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: retq + %r = call <4 x float> @llvm.convert.from.arbitrary.fp.v4f32.v4i4(<4 x i4> %x, metadata !"Float4E2M1FN") + ret <4 x float> %r +}