diff --git a/include/LLVMSPIRVExtensions.inc b/include/LLVMSPIRVExtensions.inc index 8e2e990b84..136bb65f6f 100644 --- a/include/LLVMSPIRVExtensions.inc +++ b/include/LLVMSPIRVExtensions.inc @@ -85,3 +85,5 @@ EXT(SPV_INTEL_shader_atomic_bfloat16) EXT(SPV_EXT_float8) EXT(SPV_INTEL_predicated_io) EXT(SPV_INTEL_sigmoid) +EXT(SPV_INTEL_float4) +EXT(SPV_INTEL_fp_conversions) diff --git a/lib/SPIRV/SPIRVInternal.h b/lib/SPIRV/SPIRVInternal.h index 20a53254e8..b71ec14059 100644 --- a/lib/SPIRV/SPIRVInternal.h +++ b/lib/SPIRV/SPIRVInternal.h @@ -1049,6 +1049,7 @@ enum FPEncodingWrap { BF16 = FPEncoding::FPEncodingBFloat16KHR, E4M3 = FPEncoding::FPEncodingFloat8E4M3EXT, E5M2 = FPEncoding::FPEncodingFloat8E5M2EXT, + E2M1 = internal::FPEncodingFloat4E2M1INTEL, }; // Structure describing non-trivial conversions (FP8 and int4) @@ -1077,36 +1078,117 @@ typedef SPIRVMap FPConvertToEncodingMap; // clang-format off template <> inline void FPConvertToEncodingMap::init() { - // 8-bit conversions - add("ConvertE4M3ToFP16EXT", - {FPEncodingWrap::E4M3, FPEncodingWrap::IEEE754, OpFConvert}); - add("ConvertE5M2ToFP16EXT", - {FPEncodingWrap::E5M2, FPEncodingWrap::IEEE754, OpFConvert}); - add("ConvertE4M3ToBF16EXT", - {FPEncodingWrap::E4M3, FPEncodingWrap::BF16, OpFConvert}); - add("ConvertE5M2ToBF16EXT", - {FPEncodingWrap::E5M2, FPEncodingWrap::BF16, OpFConvert}); - add("ConvertFP16ToE4M3EXT", - {FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3, OpFConvert}); - add("ConvertFP16ToE5M2EXT", - {FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2, OpFConvert}); - add("ConvertBF16ToE4M3EXT", - {FPEncodingWrap::BF16, FPEncodingWrap::E4M3, OpFConvert}); - add("ConvertBF16ToE5M2EXT", - {FPEncodingWrap::BF16, FPEncodingWrap::E5M2, OpFConvert}); - - add("ConvertInt4ToE4M3INTEL", - {FPEncodingWrap::Integer, FPEncodingWrap::E4M3, OpConvertSToF}); - add("ConvertInt4ToE5M2INTEL", - {FPEncodingWrap::Integer, FPEncodingWrap::E5M2, OpConvertSToF}); - add("ConvertInt4ToFP16INTEL", - {FPEncodingWrap::Integer, FPEncodingWrap::IEEE754, OpConvertSToF}); - add("ConvertInt4ToBF16INTEL", - {FPEncodingWrap::Integer, FPEncodingWrap::BF16, OpConvertSToF}); - add("ConvertFP16ToInt4INTEL", - {FPEncodingWrap::IEEE754, FPEncodingWrap::Integer, OpConvertFToS}); - add("ConvertBF16ToInt4INTEL", - {FPEncodingWrap::BF16, FPEncodingWrap::Integer, OpConvertFToS}); + // 4-bit conversions + add("ConvertE2M1ToE4M3INTEL", + {FPEncodingWrap::E2M1, FPEncodingWrap::E4M3, OpFConvert}); + add("ConvertE2M1ToE5M2INTEL", + {FPEncodingWrap::E2M1, FPEncodingWrap::E5M2, OpFConvert}); + add("ConvertE2M1ToFP16INTEL", + {FPEncodingWrap::E2M1, FPEncodingWrap::IEEE754, OpFConvert}); + add("ConvertE2M1ToBF16INTEL", + {FPEncodingWrap::E2M1, FPEncodingWrap::BF16, OpFConvert}); + + add("ConvertInt4ToE4M3INTEL", + {FPEncodingWrap::Integer, FPEncodingWrap::E4M3, OpConvertSToF}); + add("ConvertInt4ToE5M2INTEL", + {FPEncodingWrap::Integer, FPEncodingWrap::E5M2, OpConvertSToF}); + add("ConvertInt4ToFP16INTEL", + {FPEncodingWrap::Integer, FPEncodingWrap::IEEE754, OpConvertSToF}); + add("ConvertInt4ToBF16INTEL", + {FPEncodingWrap::Integer, FPEncodingWrap::BF16, OpConvertSToF}); + add("ConvertInt4ToInt8INTEL", + {FPEncodingWrap::Integer, FPEncodingWrap::Integer, OpSConvert}); + + add("ConvertFP16ToE2M1INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E2M1, OpFConvert}); + add("ConvertBF16ToE2M1INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E2M1, OpFConvert}); + add("ConvertFP16ToInt4INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::Integer, OpConvertFToS}); + add("ConvertBF16ToInt4INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::Integer, OpConvertFToS}); + + // 8-bit conversions + add("ConvertE4M3ToFP16EXT", + {FPEncodingWrap::E4M3, FPEncodingWrap::IEEE754, OpFConvert}); + add("ConvertE5M2ToFP16EXT", + {FPEncodingWrap::E5M2, FPEncodingWrap::IEEE754, OpFConvert}); + add("ConvertE4M3ToBF16EXT", + {FPEncodingWrap::E4M3, FPEncodingWrap::BF16, OpFConvert}); + add("ConvertE5M2ToBF16EXT", + {FPEncodingWrap::E5M2, FPEncodingWrap::BF16, OpFConvert}); + add("ConvertFP16ToE4M3EXT", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3, OpFConvert}); + add("ConvertFP16ToE5M2EXT", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2, OpFConvert}); + add("ConvertBF16ToE4M3EXT", + {FPEncodingWrap::BF16, FPEncodingWrap::E4M3, OpFConvert}); + add("ConvertBF16ToE5M2EXT", + {FPEncodingWrap::BF16, FPEncodingWrap::E5M2, OpFConvert}); + + // SPV_INTEL_fp_conversions + add("ClampConvertFP16ToE2M1INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E2M1, + internal::OpClampConvertFToFINTEL}); + add("ClampConvertBF16ToE2M1INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E2M1, + internal::OpClampConvertFToFINTEL}); + add("ClampConvertFP16ToE4M3INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3, + internal::OpClampConvertFToFINTEL}); + add("ClampConvertBF16ToE4M3INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E4M3, + internal::OpClampConvertFToFINTEL}); + add("ClampConvertFP16ToE5M2INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2, + internal::OpClampConvertFToFINTEL}); + add("ClampConvertBF16ToE5M2INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E5M2, + internal::OpClampConvertFToFINTEL}); + add("ClampConvertFP16ToInt4INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::Integer, + internal::OpClampConvertFToSINTEL}); + add("ClampConvertBF16ToInt4INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::Integer, + internal::OpClampConvertFToSINTEL}); + + add("StochasticRoundFP16ToE5M2INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2, + internal::OpStochasticRoundFToFINTEL}); + add("StochasticRoundFP16ToE4M3INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3, + internal::OpStochasticRoundFToFINTEL}); + add("StochasticRoundBF16ToE5M2INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E5M2, + internal::OpStochasticRoundFToFINTEL}); + add("StochasticRoundBF16ToE4M3INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E4M3, + internal::OpStochasticRoundFToFINTEL}); + add("StochasticRoundFP16ToE2M1INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E2M1, + internal::OpStochasticRoundFToFINTEL}); + add("StochasticRoundBF16ToE2M1INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E2M1, + internal::OpStochasticRoundFToFINTEL}); + add("ClampStochasticRoundFP16ToInt4INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::Integer, + internal::OpClampStochasticRoundFToSINTEL}); + add("ClampStochasticRoundBF16ToInt4INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::Integer, + internal::OpClampStochasticRoundFToSINTEL}); + + add("ClampStochasticRoundFP16ToE5M2INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E5M2, + internal::OpClampStochasticRoundFToFINTEL}); + add("ClampStochasticRoundFP16ToE4M3INTEL", + {FPEncodingWrap::IEEE754, FPEncodingWrap::E4M3, + internal::OpClampStochasticRoundFToFINTEL}); + add("ClampStochasticRoundBF16ToE5M2INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E5M2, + internal::OpClampStochasticRoundFToFINTEL}); + add("ClampStochasticRoundBF16ToE4M3INTEL", + {FPEncodingWrap::BF16, FPEncodingWrap::E4M3, + internal::OpClampStochasticRoundFToFINTEL}); } // clang-format on diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index 70cfbef303..a3d9a1da80 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -297,8 +297,11 @@ std::optional SPIRVToLLVM::getAlignment(SPIRVValue *V) { Type *SPIRVToLLVM::transFPType(SPIRVType *T) { switch (T->getFloatBitWidth()) { + case 4: + // No LLVM IR counter part for FP4 - map it on i4. + return Type::getIntNTy(*Context, 4); case 8: - // No LLVM IR counter part for FP8 - map it on i8 + // No LLVM IR counter part for FP8 - map it on i8. return Type::getIntNTy(*Context, 8); case 16: if (T->isTypeFloat(16, FPEncodingBFloat16KHR)) @@ -1064,11 +1067,12 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *BV, Function *F, return FPEncodingWrap::IEEE754; }; - auto IsFP8Encoding = [](FPEncodingWrap Encoding) -> bool { - return Encoding == FPEncodingWrap::E4M3 || Encoding == FPEncodingWrap::E5M2; + auto IsFP4OrFP8Encoding = [](FPEncodingWrap Encoding) -> bool { + return Encoding == FPEncodingWrap::E4M3 || + Encoding == FPEncodingWrap::E5M2 || Encoding == FPEncodingWrap::E2M1; }; - switch (BC->getOpCode()) { + switch (static_cast(BC->getOpCode())) { case OpPtrCastToGeneric: case OpGenericCastToPtr: case OpPtrCastToCrossWorkgroupINTEL: @@ -1089,6 +1093,11 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *BV, Function *F, case OpUConvert: CO = IsExt ? Instruction::ZExt : Instruction::Trunc; break; + case internal::OpClampConvertFToFINTEL: + case internal::OpClampConvertFToSINTEL: + case internal::OpStochasticRoundFToFINTEL: + case internal::OpClampStochasticRoundFToFINTEL: + case internal::OpClampStochasticRoundFToSINTEL: case OpConvertSToF: case OpConvertFToS: case OpConvertUToF: @@ -1113,7 +1122,7 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *BV, Function *F, FPEncodingWrap SrcEnc = GetEncodingAndUpdateType(SPVSrcTy); FPEncodingWrap DstEnc = GetEncodingAndUpdateType(SPVDstTy); - if (IsFP8Encoding(SrcEnc) || IsFP8Encoding(DstEnc) || + if (IsFP4OrFP8Encoding(SrcEnc) || IsFP4OrFP8Encoding(DstEnc) || SPVSrcTy->isTypeInt(4) || SPVDstTy->isTypeInt(4)) { FPConversionDesc FPDesc = {SrcEnc, DstEnc, BC->getOpCode()}; auto Conv = SPIRV::FPConvertToEncodingMap::rmap(FPDesc); @@ -1123,13 +1132,47 @@ Value *SPIRVToLLVM::transConvertInst(SPIRVValue *BV, Function *F, std::string BuiltinName = kSPIRVName::InternalBuiltinPrefix + std::string(Conv); BuiltinFuncMangleInfo Info; - std::string MangledName = mangleBuiltin(BuiltinName, OpsTys, &Info); + std::string MangledName; + // Translate additional Ops for stochastic conversions. + if (OC == internal::OpStochasticRoundFToFINTEL || + OC == internal::OpClampStochasticRoundFToFINTEL || + OC == internal::OpClampStochasticRoundFToSINTEL) { + // Seed. + Ops.emplace_back(transValue(SPVOps[1], F, BB, true)); + OpsTys.emplace_back(Ops[1]->getType()); + constexpr unsigned MaxOpsSize = 3; + if (SPVOps.size() == MaxOpsSize) { + // New Seed. + Ops.emplace_back(transValue(SPVOps[2], F, BB, true)); + + // The following mess is needed to create a function with correct + // mangling. + SPIRVType *PtrTy = SPVOps[2]->getType(); + const unsigned AS = + SPIRSPIRVAddrSpaceMap::rmap(PtrTy->getPointerStorageClass()); + Type *ElementTy = transType(PtrTy->getPointerElementType()); + OpsTys.emplace_back(TypedPointerType::get(ElementTy, AS)); + MangledName = mangleBuiltin(BuiltinName, OpsTys, &Info); + // But to create function itself we need untyped pointer type. + OpsTys[2] = opaquifyType(OpsTys[2]); + } + } + + if (MangledName.empty()) + MangledName = mangleBuiltin(BuiltinName, OpsTys, &Info); FunctionType *FTy = FunctionType::get(Dst, OpsTys, false); FunctionCallee Func = M->getOrInsertFunction(MangledName, FTy); return CallInst::Create(Func, Ops, "", BB); } } + // These conversions can be done without __builtin_spirv prefixed functions + // as their operand and result types have native representation in LLVM IR. + if (OC == internal::OpClampConvertFToFINTEL || + OC == internal::OpStochasticRoundFToFINTEL || + OC == internal::OpClampStochasticRoundFToFINTEL) + return mapValue(BV, transSPIRVBuiltinFromInst( + static_cast(BV), BB)); if (OC == OpFConvert) { CO = IsExt ? Instruction::FPExt : Instruction::FPTrunc; @@ -3053,7 +3096,11 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, if (OutMatrixElementTy->isTypeFloat(8, FPEncodingFloat8E4M3EXT) || OutMatrixElementTy->isTypeFloat(8, FPEncodingFloat8E5M2EXT) || InMatrixElementTy->isTypeFloat(8, FPEncodingFloat8E4M3EXT) || - InMatrixElementTy->isTypeFloat(8, FPEncodingFloat8E5M2EXT)) + InMatrixElementTy->isTypeFloat(8, FPEncodingFloat8E5M2EXT) || + OutMatrixElementTy->isTypeFloat( + 4, internal::FPEncodingFloat4E2M1INTEL) || + InMatrixElementTy->isTypeFloat(4, + internal::FPEncodingFloat4E2M1INTEL)) Inst = transConvertInst(BV, F, BB); else Inst = transSPIRVBuiltinFromInst(BI, BB); @@ -3062,6 +3109,8 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, } return mapValue(BV, Inst); } + if (isIntelCvtOpCode(OC)) + return mapValue(BV, transConvertInst(BV, F, BB)); return mapValue( BV, transSPIRVBuiltinFromInst(static_cast(BV), BB)); } @@ -3878,6 +3927,11 @@ Instruction *SPIRVToLLVM::transSPIRVBuiltinFromInst(SPIRVInstruction *BI, case internal::OpTaskSequenceCreateINTEL: case internal::OpConvertHandleToImageINTEL: case internal::OpConvertHandleToSampledImageINTEL: + case internal::OpClampConvertFToFINTEL: + case internal::OpClampConvertFToSINTEL: + case internal::OpStochasticRoundFToFINTEL: + case internal::OpClampStochasticRoundFToFINTEL: + case internal::OpClampStochasticRoundFToSINTEL: AddRetTypePostfix = true; break; default: { diff --git a/lib/SPIRV/SPIRVToOCL.cpp b/lib/SPIRV/SPIRVToOCL.cpp index e2e36bc8b7..7be3c8a2b3 100644 --- a/lib/SPIRV/SPIRVToOCL.cpp +++ b/lib/SPIRV/SPIRVToOCL.cpp @@ -247,6 +247,10 @@ void SPIRVToOCLBase::visitCastInst(CastInst &Cast) { DstVecTy->getScalarSizeInBits() == 1) return; + // We don't have OpenCL builtins for 4-bit conversions. + if (DstVecTy->getScalarSizeInBits() == 4 || SrcTy->getScalarSizeInBits() == 4) + return; + // Assemble built-in name -> convert_gentypeN std::string CastBuiltInName(kOCLBuiltinName::ConvertPrefix); // Check if this is 'floating point -> unsigned integer' cast diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp index b5e03ddea3..84cdb5bbb0 100644 --- a/lib/SPIRV/SPIRVWriter.cpp +++ b/lib/SPIRV/SPIRVWriter.cpp @@ -918,9 +918,10 @@ SPIRVFunction *LLVMToSPIRVBase::transFunctionDecl(Function *F) { // Proper checks for the required extensions will be done during TypeFloat // generation. if (!BM->isAllowedToUseExtension(ExtensionID::SPV_EXT_float8) && - !BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_int4)) { + !BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_int4) && + !BM->isAllowedToUseExtension(ExtensionID::SPV_INTEL_float4)) { std::string ErrorStr = - "One of the following extensions: SPV_EXT_float8, " + "One of the following extensions: SPV_EXT_float8, SPV_INTEL_float4" "SPV_INTEL_int4 should be enabled to process " "conversion builtins"; getErrorLog().checkError(false, SPIRVEC_RequiresExtension, F, ErrorStr); @@ -5506,6 +5507,49 @@ SPIRVValue *LLVMToSPIRVBase::transCallInst(CallInst *CI, SPIRVBasicBlock *BB) { return transDirectCallInst(CI, BB); } +// Helper function to process mini-float or int4 types for FP conversions. +// Processes type width, packing, and creates the appropriate SPIRV type. +// Returns the SPIRV type and outputs vector size information. +static SPIRVType * +processMiniFPOrInt4Type(Type *LLVMTy, FPEncodingWrap Encoding, + std::function GetScalarTy, + SPIRVModule *BM, unsigned &OutVecSize) { + Type *ScalarTy = GetScalarTy(LLVMTy); + unsigned TyWidth = cast(ScalarTy)->getBitWidth(); + unsigned VecSize = 0; + + if (TyWidth == 32) { + // Int4 or FP4 packed in 32-bit integer, change type and vector size. + assert((Encoding == FPEncodingWrap::E2M1 || + Encoding == FPEncodingWrap::Integer) && + "Unknown FP encoding"); + assert(!isLLVMCooperativeMatrixType(LLVMTy) && + "FP4 and Int4 matrices must not be packed"); + VecSize = 8; + TyWidth = 4; + } else if (TyWidth == 8 && (Encoding == FPEncodingWrap::E2M1 || + Encoding == FPEncodingWrap::Integer)) { + assert(!isLLVMCooperativeMatrixType(LLVMTy) && + "FP4 and Int4 matrices must not be packed"); + // Int4 or FP4 packed in 8-bit integer, change type and vector size. + VecSize = 2; + TyWidth = 4; + } else { + if (LLVMTy->isVectorTy()) + VecSize = cast(LLVMTy)->getElementCount().getFixedValue(); + } + + SPIRVType *Ty; + if (Encoding == FPEncodingWrap::Integer) { + Ty = BM->addIntegerType(TyWidth); + } else { + Ty = BM->addFloatType(TyWidth, Encoding); + } + + OutVecSize = VecSize; + return Ty; +} + SPIRVValue *LLVMToSPIRVBase::transDirectCallInst(CallInst *CI, SPIRVBasicBlock *BB) { SPIRVExtInstSetKind ExtSetKind = SPIRVEIS_Count; @@ -5529,7 +5573,7 @@ SPIRVValue *LLVMToSPIRVBase::transDirectCallInst(CallInst *CI, // Logic of the code below is described in // docs/SPIRVMiniFloatsRepresentationInLLVM.rst // A quick recap of the document: - // For FP8 types (which don't have appropriate counterparts in LLVM) + // For FP4 and FP8 types (which don't have appropriate counterparts in LLVM) // the translator expect to see external function calls with __builtin_spirv // prefix, names of the functions encode the used in the conversion // FP types and will be used by the translator for proper TypeFloat values @@ -5567,42 +5611,17 @@ SPIRVValue *LLVMToSPIRVBase::transDirectCallInst(CallInst *CI, SPIRVValue *SrcOp = transValue(Src, BB); - // TODO: unify SrcTy and DstTy processing into a single routine. if (!SrcTy) { - // Src type is 'mini' float or int4 - Type *SrcScalarTy = GetScalarTy(LLVMSrcTy); - unsigned SrcTyWidth = cast(SrcScalarTy)->getBitWidth(); + // Src type is 'mini' float or int4. unsigned SrcVecSize = 0; - if (SrcTyWidth == 32) { - // Int4 packed in 32-bit integer, change Src type and vector size - assert(FPDesc.SrcEncoding == FPEncodingWrap::Integer && - "Unknown FP encoding"); - assert(!isLLVMCooperativeMatrixType(LLVMSrcTy) && - "Int4 matrices must not be packed"); - SrcVecSize = 8; - SrcTyWidth = 4; - } else if (SrcTyWidth == 8 && - FPDesc.SrcEncoding == FPEncodingWrap::Integer) { - assert(!isLLVMCooperativeMatrixType(LLVMSrcTy) && - "Int4 matrices must not be packed"); - // Int4 packed in 8-bit integer, change Src type and vector size - SrcVecSize = 2; - SrcTyWidth = 4; - } else { - if (LLVMSrcTy->isVectorTy()) - SrcVecSize = - cast(LLVMSrcTy)->getElementCount().getFixedValue(); - } - if (FPDesc.SrcEncoding == FPEncodingWrap::Integer) { - SrcTy = BM->addIntegerType(SrcTyWidth); - } else { - SrcTy = BM->addFloatType(SrcTyWidth, FPDesc.SrcEncoding); - } + SrcTy = processMiniFPOrInt4Type(LLVMSrcTy, FPDesc.SrcEncoding, + GetScalarTy, BM, SrcVecSize); + if (SrcVecSize > 0) SrcTy = BM->addVectorType(SrcTy, SrcVecSize); if (isLLVMCooperativeMatrixType(LLVMSrcTy)) { - // Create FP8 matrix with a new type and insert a bitcast. + // Create FP4/FP8 matrix with a new type and insert a bitcast. SrcTy = BM->addCooperativeMatrixKHRType( SrcTy, static_cast(transType(LLVMSrcTy)) @@ -5610,41 +5629,15 @@ SPIRVValue *LLVMToSPIRVBase::transDirectCallInst(CallInst *CI, SrcOp = BM->addUnaryInst(OpBitcast, SrcTy, SrcOp, BB); } else if (FPDesc.SrcEncoding != FPEncodingWrap::Integer || (SrcTy->isTypeVector() && !LLVMSrcTy->isVectorTy())) { - // Create bitcast for FP8 and packed Int4. + // Create bitcast for FP4, FP8 and packed Int4. SrcOp = BM->addUnaryInst(OpBitcast, SrcTy, SrcOp, BB); } } if (!DstTy) { - // Dst type is 'mini' float or int4 - Type *DstScalarTy = GetScalarTy(LLVMDstTy); - unsigned DstTyWidth = cast(DstScalarTy)->getBitWidth(); + // Dst type is 'mini' float or int4. unsigned DstVecSize = 0; - if (DstTyWidth == 32) { - // Int4 packed in 32-bit integer, change Dst type and vector size - assert(FPDesc.DstEncoding == FPEncodingWrap::Integer && - "Unknown FP encoding"); - assert(!isLLVMCooperativeMatrixType(LLVMDstTy) && - "Int4 matrices must not be packed"); - DstVecSize = 8; - DstTyWidth = 4; - } else if (DstTyWidth == 8 && - FPDesc.DstEncoding == FPEncodingWrap::Integer) { - assert(!isLLVMCooperativeMatrixType(LLVMDstTy) && - "Int4 matrices must not be packed"); - // Int4 packed in 8-bit integer, change Dst type and vector size - DstVecSize = 2; - DstTyWidth = 4; - } else { - // Currently unused in SYCL - if (LLVMDstTy->isVectorTy()) - DstVecSize = - cast(LLVMDstTy)->getElementCount().getFixedValue(); - } - if (FPDesc.DstEncoding == FPEncodingWrap::Integer) { - DstTy = BM->addIntegerType(DstTyWidth); - } else { - DstTy = BM->addFloatType(DstTyWidth, FPDesc.DstEncoding); - } + DstTy = processMiniFPOrInt4Type(LLVMDstTy, FPDesc.DstEncoding, + GetScalarTy, BM, DstVecSize); if (isLLVMCooperativeMatrixType(LLVMDstTy)) // Create FP8 matrix with a new type. diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index fc36a375b0..89b4dde3e8 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -4499,7 +4499,7 @@ _SPIRV_OP(PredicatedStore, false, 4, true) #undef _SPIRV_OP template class SPIRVFSigmoidINTELInstBase : public SPIRVUnaryInst { -protected: +public: SPIRVCapVec getRequiredCapability() const override { return getVec(internal::CapabilitySigmoidINTEL); } @@ -4556,5 +4556,26 @@ template class SPIRVFSigmoidINTELInstBase : public SPIRVUnaryInst { typedef SPIRVFSigmoidINTELInstBase SPIRV##x; _SPIRV_OP(FSigmoidINTEL) #undef _SPIRV_OP + +class SPIRVFPConversionINTELInstBase : public SPIRVInstTemplateBase { +public: + SPIRVCapVec getRequiredCapability() const override { + return getVec(internal::CapabilityFloatConversionsINTEL); + } + + std::optional getRequiredExtension() const override { + return ExtensionID::SPV_INTEL_fp_conversions; + } +}; +#define _SPIRV_OP(x, ...) \ + typedef SPIRVInstTemplate \ + SPIRV##x##INTEL; +_SPIRV_OP(ClampConvertFToF, true, 4, false) +_SPIRV_OP(ClampConvertFToS, true, 4, false) +_SPIRV_OP(StochasticRoundFToF, true, 5, true) +_SPIRV_OP(ClampStochasticRoundFToF, true, 5, true) +_SPIRV_OP(ClampStochasticRoundFToS, true, 5, true) +#undef _SPIRV_OP } // namespace SPIRV #endif // SPIRV_LIBSPIRV_SPIRVINSTRUCTION_H diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index dc3988d074..63cf086713 100644 --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -695,6 +695,10 @@ template <> inline void SPIRVMap::init() { add(CapabilityFloat8CooperativeMatrixEXT, "Float8CooperativeMatrixEXT"); add(internal::CapabilityPredicatedIOINTEL, "PredicatedIOINTEL"); add(internal::CapabilitySigmoidINTEL, "SigmoidINTEL"); + add(internal::CapabilityFloat4E2M1INTEL, "Float4E2M1INTEL"); + add(internal::CapabilityFloat4E2M1CooperativeMatrixINTEL, + "Float4E2M1CooperativeMatrixINTEL"); + add(internal::CapabilityFloatConversionsINTEL, "FloatConversionsINTEL"); } SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h index 3699edc2af..f441dd7b41 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h @@ -122,6 +122,14 @@ inline bool isCvtOpCode(Op OpCode) { OpCode == OpCrossWorkgroupCastToPtrINTEL; } +inline bool isIntelCvtOpCode(Op OpCode) { + return OpCode == internal::OpClampConvertFToFINTEL || + OpCode == internal::OpClampConvertFToSINTEL || + OpCode == internal::OpStochasticRoundFToFINTEL || + OpCode == internal::OpClampStochasticRoundFToFINTEL || + OpCode == internal::OpClampStochasticRoundFToSINTEL; +} + inline bool isCvtToUnsignedOpCode(Op OpCode) { return OpCode == OpConvertFToU || OpCode == OpUConvert || OpCode == OpSatConvertSToU; diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h index 33efa22ee7..13de704429 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h @@ -46,3 +46,13 @@ _SPIRV_OP_INTERNAL(PredicatedLoadINTEL, _SPIRV_OP_INTERNAL(PredicatedStoreINTEL, internal::OpPredicatedStoreINTEL) _SPIRV_OP_INTERNAL(FSigmoidINTEL, internal::FSigmoidINTEL) +_SPIRV_OP_INTERNAL(ClampConvertFToFINTEL, + internal::OpClampConvertFToFINTEL) +_SPIRV_OP_INTERNAL(StochasticRoundFToFINTEL, + internal::OpStochasticRoundFToFINTEL) +_SPIRV_OP_INTERNAL(ClampStochasticRoundFToFINTEL, + internal::OpClampStochasticRoundFToFINTEL) +_SPIRV_OP_INTERNAL(ClampConvertFToSINTEL, + internal::OpClampConvertFToSINTEL) +_SPIRV_OP_INTERNAL(ClampStochasticRoundFToSINTEL, + internal::OpClampStochasticRoundFToSINTEL) diff --git a/lib/SPIRV/libSPIRV/SPIRVType.h b/lib/SPIRV/libSPIRV/SPIRVType.h index d7a520a345..bd6f91615f 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/lib/SPIRV/libSPIRV/SPIRVType.h @@ -240,6 +240,8 @@ class SPIRVTypeFloat : public SPIRVType { if (isTypeFloat(8, FPEncodingFloat8E4M3EXT) || isTypeFloat(8, FPEncodingFloat8E5M2EXT)) return ExtensionID::SPV_EXT_float8; + if (isTypeFloat(4, internal::FPEncodingFloat4E2M1INTEL)) + return ExtensionID::SPV_INTEL_float4; return {}; } @@ -258,6 +260,8 @@ class SPIRVTypeFloat : public SPIRVType { } else if (isTypeFloat(8, FPEncodingFloat8E4M3EXT) || isTypeFloat(8, FPEncodingFloat8E5M2EXT)) { CV.push_back(CapabilityFloat8EXT); + } else if (isTypeFloat(4, internal::FPEncodingFloat4E2M1INTEL)) { + CV.push_back(internal::CapabilityFloat4E2M1INTEL); } return CV; } @@ -281,14 +285,16 @@ class SPIRVTypeFloat : public SPIRVType { void validate() const override { SPIRVEntry::validate(); - assert( - (BitWidth == 8 || BitWidth == 16 || BitWidth == 32 || BitWidth == 64) && - "Invalid bit width"); + assert((BitWidth == 4 || BitWidth == 8 || BitWidth == 16 || + BitWidth == 32 || BitWidth == 64) && + "Invalid bit width"); assert( (FloatingPointEncoding == FPEncodingMax || (BitWidth == 16 && FloatingPointEncoding == FPEncodingBFloat16KHR) || (BitWidth == 8 && FloatingPointEncoding == FPEncodingFloat8E4M3EXT) || - (BitWidth == 8 && FloatingPointEncoding == FPEncodingFloat8E5M2EXT)) && + (BitWidth == 8 && FloatingPointEncoding == FPEncodingFloat8E5M2EXT) || + (BitWidth == 4 && + FloatingPointEncoding == internal::FPEncodingFloat4E2M1INTEL)) && "Invalid floating point encoding"); } @@ -1263,6 +1269,8 @@ class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { else if (CompType->isTypeFloat(8, FPEncodingFloat8E4M3EXT) || CompType->isTypeFloat(8, FPEncodingFloat8E5M2EXT)) CV.push_back(CapabilityFloat8CooperativeMatrixEXT); + else if (CompType->isTypeFloat(4, internal::FPEncodingFloat4E2M1INTEL)) + CV.push_back(internal::CapabilityFloat4E2M1CooperativeMatrixINTEL); return CV; } diff --git a/lib/SPIRV/libSPIRV/spirv_internal.hpp b/lib/SPIRV/libSPIRV/spirv_internal.hpp index 6bdfcc38f5..69cebea7af 100644 --- a/lib/SPIRV/libSPIRV/spirv_internal.hpp +++ b/lib/SPIRV/libSPIRV/spirv_internal.hpp @@ -74,12 +74,17 @@ enum InternalOp { IOpCooperativeMatrixLoadCheckedINTEL = 6193, IOpCooperativeMatrixStoreCheckedINTEL = 6194, IOpCooperativeMatrixConstructCheckedINTEL = 6195, + IOpTypeTaskSequenceINTEL = 6199, + IOpClampConvertFToFINTEL = 6216, + IOpStochasticRoundFToFINTEL = 6217, + IOpClampStochasticRoundFToFINTEL = 6218, + IOpClampStochasticRoundFToSINTEL = 6219, IOpCooperativeMatrixLoadOffsetINTEL = 6239, IOpCooperativeMatrixStoreOffsetINTEL = 6240, IOpPredicatedLoadINTEL = 6258, IOpPredicatedStoreINTEL = 6259, IOpJointMatrixWorkItemLengthINTEL = 6410, - IOpTypeTaskSequenceINTEL = 6199, + IOpClampConvertFToSINTEL = 6424, IOpMaskedGatherINTEL = 6428, IOpMaskedScatterINTEL = 6429, IOpJointMatrixGetElementCoordINTEL = 6440, @@ -110,6 +115,9 @@ enum InternalCapability { ICapabilityTaskSequenceINTEL = 6162, ICapabilitySigmoidINTEL = 6167, ICapabilityCooperativeMatrixCheckedInstructionsINTEL = 6192, + ICapabilityFloat4E2M1INTEL = 6212, + ICapabilityFloat4E2M1CooperativeMatrixINTEL = 6213, + ICapabilityFloatConversionsINTEL = 6215, ICapabilityBFloat16ArithmeticINTEL = 6226, ICapabilityCooperativeMatrixOffsetInstructionsINTEL = 6238, ICapabilityAtomicBFloat16AddINTEL = 6255, @@ -151,6 +159,11 @@ enum InternalJointMatrixCTI { PackedInt4 = 4 }; +enum InternalFPEncoding { + FPEncodingFloat4E2M1INTEL = 6214, + FPEncodingMax = 0x7fffffff, +}; + enum InternalBuiltIn { IBuiltInSubDeviceIDINTEL = 6135, IBuiltInGlobalHWThreadIDINTEL = 6136, @@ -220,6 +233,15 @@ _SPIRV_OP(Op, PredicatedStoreINTEL) _SPIRV_OP(Capability, SigmoidINTEL) _SPIRV_OP(Op, FSigmoidINTEL) +_SPIRV_OP(Capability, Float4E2M1INTEL) +_SPIRV_OP(Capability, Float4E2M1CooperativeMatrixINTEL) + +_SPIRV_OP(Capability, FloatConversionsINTEL) +_SPIRV_OP(Op, ClampConvertFToFINTEL) +_SPIRV_OP(Op, ClampConvertFToSINTEL) +_SPIRV_OP(Op, StochasticRoundFToFINTEL) +_SPIRV_OP(Op, ClampStochasticRoundFToFINTEL) +_SPIRV_OP(Op, ClampStochasticRoundFToSINTEL) #undef _SPIRV_OP constexpr SourceLanguage SourceLanguagePython = diff --git a/test/extensions/SPV_INTEL_float4/conversions_packed.ll b/test/extensions/SPV_INTEL_float4/conversions_packed.ll new file mode 100644 index 0000000000..df2ea4afbc --- /dev/null +++ b/test/extensions/SPV_INTEL_float4/conversions_packed.ll @@ -0,0 +1,131 @@ +; This tests checks if FP4 and Int4 packed conversions specified by +; __builtin_spirv_* external function calls translated correctly. +; Not all of the instructions are tested here, only one per the following +; cases: +; 1. from packed FP4 to ... : +; a. packed in 32-bit +; b. packed in 8-bit +; 2. to packed FP4 from ... : +; a. packed in 32-bit +; b. packed in 8-bit + +; RUN: llvm-spirv %s -o %t.spv --spirv-ext=+SPV_EXT_float8,+SPV_INTEL_float4,+SPV_INTEL_int4,+SPV_KHR_bfloat16 +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.spv -o %t.rev.bc -r --spirv-target-env=SPV-IR +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability Float8EXT +; CHECK-SPIRV-DAG: Capability Float4E2M1INTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_float4" +; CHECK-SPIRV-DAG: Extension "SPV_EXT_float8" + +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_hf8_32:]] "fp4e2m1_hf8_32" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_hf8_8:]] "fp4e2m1_hf8_8" +; CHECK-SPIRV-DAG: Name [[#hf16_fp4e2m1_32:]] "hf16_fp4e2m1_32" +; CHECK-SPIRV-DAG: Name [[#hf16_fp4e2m1_8:]] "hf16_fp4e2m1_8" + +; CHECK-SPIRV-DAG: TypeInt [[#Int32Ty:]] 32 0 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Int32Const:]] 1 + +; CHECK-SPIRV-DAG: TypeInt [[#Int8Ty:]] 8 0 +; CHECK-SPIRV-DAG: TypeVector [[#Int8Vec8Ty:]] [[#Int8Ty]] 8 +; CHECK-SPIRV-DAG: TypeVector [[#Int8Vec2Ty:]] [[#Int8Ty]] 2 +; CHECK-SPIRV-DAG: Constant [[#Int8Ty]] [[#Int8Const:]] 1 + +; CHECK-SPIRV-DAG: TypeFloat [[#E2M1Ty:]] 4 6214 +; CHECK-SPIRV-DAG: TypeVector [[#E2M1Vec8Ty:]] [[#E2M1Ty]] 8 +; CHECK-SPIRV-DAG: TypeVector [[#E2M1Vec2Ty:]] [[#E2M1Ty]] 2 + +; CHECK-SPIRV-DAG: TypeFloat [[#Float8E4M3Ty:]] 8 4214 +; CHECK-SPIRV-DAG: TypeVector [[#Float8E4M3Vec8Ty:]] [[#Float8E4M3Ty]] 8 +; CHECK-SPIRV-DAG: TypeVector [[#Float8E4M3Vec2Ty:]] [[#Float8E4M3Ty]] 2 + +; CHECK-SPIRV-DAG: TypeFloat [[#HFloat16Ty:]] 16 {{$}} +; CHECK-SPIRV-DAG: TypeVector [[#HFloat16Vec8Ty:]] [[#HFloat16Ty]] 8 +; CHECK-SPIRV-DAG: TypeVector [[#HFloat16Vec2Ty:]] [[#HFloat16Ty]] 2 +; CHECK-SPIRV-DAG: Constant [[#HFloat16Ty]] [[#HFloat16Const:]] 15360 +; CHECK-SPIRV-DAG: ConstantComposite [[#HFloat16Vec8Ty]] [[#HFloat16Vec8Const:]] [[#HFloat16Const]] [[#HFloat16Const]] [[#HFloat16Const]] [[#HFloat16Const]] [[#HFloat16Const]] [[#HFloat16Const]] [[#HFloat16Const]] [[#HFloat16Const]] +; CHECK-SPIRV-DAG: ConstantComposite [[#HFloat16Vec2Ty]] [[#HFloat16Vec2Const:]] [[#HFloat16Const]] [[#HFloat16Const]] + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Packed in 32-bit integer + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_hf8_32]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1Vec8Ty]] [[#Cast1:]] [[#Int32Const]] +; CHECK-SPIRV: FConvert [[#Float8E4M3Vec8Ty]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: Bitcast [[#Int8Vec8Ty]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: fp4e2m1_hf8_32 +; CHECK-LLVM: %[[#Cast:]] = bitcast i32 1 to <8 x i4> +; CHECK-LLVM: %[[#Call:]] = call <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELDv8_i(<8 x i4> %[[#Cast]]) +; CHECK-LLVM: ret <8 x i8> %[[#Call]] + +define spir_func <8 x i8> @fp4e2m1_hf8_32() { +entry: + %0 = call spir_func <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELi(i32 1) + ret <8 x i8> %0 +} + +declare dso_local spir_func <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELi(i32) + +; CHECK-SPIRV: Function [[#]] [[#hf16_fp4e2m1_32]] [[#]] +; CHECK-SPIRV: FConvert [[#E2M1Vec8Ty]] [[#Conv:]] [[#HFloat16Vec8Const]] +; CHECK-SPIRV: Bitcast [[#Int32Ty]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: hf16_fp4e2m1_32 +; CHECK-LLVM: %[[#Call:]] = call <8 x i4> @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv8_Dh(<8 x half> splat (half 0xH3C00)) +; CHECK-LLVM: %[[#Cast:]] = bitcast <8 x i4> %[[#Call]] to i32 +; CHECK-LLVM: ret i32 %[[#Cast]] + +define spir_func i32 @hf16_fp4e2m1_32() { +entry: + %0 = call i32 @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv8_Dh(<8 x half> ) + ret i32 %0 +} + +declare dso_local spir_func i32 @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv8_Dh(<8 x half>) + +; Packed in 8-bit integer + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_hf8_8]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1Vec2Ty]] [[#Cast1:]] [[#Int8Const]] +; CHECK-SPIRV: FConvert [[#Float8E4M3Vec2Ty]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: Bitcast [[#Int8Vec2Ty]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: fp4e2m1_hf8_8 +; CHECK-LLVM: %[[#Cast:]] = bitcast i8 1 to <2 x i4> +; CHECK-LLVM: %[[#Call:]] = call <2 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELDv2_i(<2 x i4> %[[#Cast]]) +; CHECK-LLVM: ret <2 x i8> %[[#Call]] + +define spir_func <2 x i8> @fp4e2m1_hf8_8() { +entry: + %0 = call spir_func <2 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELc(i8 1) + ret <2 x i8> %0 +} + +declare dso_local spir_func <2 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELc(i8) + +; CHECK-SPIRV: Function [[#]] [[#hf16_fp4e2m1_8]] [[#]] +; CHECK-SPIRV: FConvert [[#E2M1Vec2Ty]] [[#Conv:]] [[#HFloat16Vec2Const]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: hf16_fp4e2m1_8 +; CHECK-LLVM: %[[#Call:]] = call <2 x i4> @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv2_Dh(<2 x half> splat (half 0xH3C00)) +; CHECK-LLVM: %[[#Cast:]] = bitcast <2 x i4> %[[#Call]] to i8 +; CHECK-LLVM: ret i8 %[[#Cast]] + +define spir_func i8 @hf16_fp4e2m1_8() { +entry: + %0 = call i8 @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv2_Dh(<2 x half> ) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv2_Dh(<2 x half>) diff --git a/test/extensions/SPV_INTEL_float4/conversions_scalar_vector.ll b/test/extensions/SPV_INTEL_float4/conversions_scalar_vector.ll new file mode 100644 index 0000000000..e783fad326 --- /dev/null +++ b/test/extensions/SPV_INTEL_float4/conversions_scalar_vector.ll @@ -0,0 +1,275 @@ +; This tests checks if FP4 and FP8 scalar and vector conversions specified by +; __builtin_spirv_* external function calls translated correctly. It doesn't +; include Clamp*, Biased*, ClampBiased* conversions (it's part of another test +; file). + +; RUN: llvm-spirv %s -o %t.spv --spirv-ext=+SPV_EXT_float8,+SPV_INTEL_float4,+SPV_INTEL_int4,+SPV_KHR_bfloat16 +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.spv -o %t.rev.bc -r --spirv-target-env=SPV-IR +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability Int4TypeINTEL +; CHECK-SPIRV-DAG: Capability Float8EXT + +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_int4" +; CHECK-SPIRV-DAG: Extension "SPV_EXT_float8" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_float4" + +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_hf8_scalar:]] "fp4e2m1_hf8_scalar" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_hf8_vector:]] "fp4e2m1_hf8_vector" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_bf8_scalar:]] "fp4e2m1_bf8_scalar" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_bf8_vector:]] "fp4e2m1_bf8_vector" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_hf16_scalar:]] "fp4e2m1_hf16_scalar" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_hf16_vector:]] "fp4e2m1_hf16_vector" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_bf16_scalar:]] "fp4e2m1_bf16_scalar" +; CHECK-SPIRV-DAG: Name [[#fp4e2m1_bf16_vector:]] "fp4e2m1_bf16_vector" + +; CHECK-SPIRV-DAG: Name [[#hf16_fp4e2m1_scalar:]] "hf16_fp4e2m1_scalar" +; CHECK-SPIRV-DAG: Name [[#hf16_fp4e2m1_vector:]] "hf16_fp4e2m1_vector" +; CHECK-SPIRV-DAG: Name [[#bf16_fp4e2m1_scalar:]] "bf16_fp4e2m1_scalar" +; CHECK-SPIRV-DAG: Name [[#bf16_fp4e2m1_vector:]] "bf16_fp4e2m1_vector" + +; CHECK-SPIRV-DAG: TypeInt [[#Int8Ty:]] 8 0 +; CHECK-SPIRV-DAG: TypeVector [[#Int8VecTy:]] [[#Int8Ty]] 8 + +; CHECK-SPIRV-DAG: TypeInt [[#Int4Ty:]] 4 0 +; CHECK-SPIRV-DAG: TypeVector [[#Int4VecTy:]] [[#Int4Ty]] 8 +; CHECK-SPIRV-DAG: Constant [[#Int4Ty]] [[#Int4Const:]] 1 +; CHECK-SPIRV-DAG: ConstantComposite [[#Int4VecTy]] [[#Int4VecConst:]] [[#Int4Const]] [[#Int4Const]] [[#Int4Const]] [[#Int4Const]] [[#Int4Const]] [[#Int4Const]] [[#Int4Const]] [[#Int4Const]] + +; CHECK-SPIRV-DAG: TypeFloat [[#E2M1Ty:]] 4 6214 +; CHECK-SPIRV-DAG: TypeVector [[#E2M1VecTy:]] [[#E2M1Ty]] 8 + +; CHECK-SPIRV-DAG: TypeFloat [[#HFloat8Ty:]] 8 4214 +; CHECK-SPIRV-DAG: TypeVector [[#HFloat8VecTy:]] [[#HFloat8Ty]] 8 + +; CHECK-SPIRV-DAG: TypeFloat [[#BFloat8Ty:]] 8 4215 +; CHECK-SPIRV-DAG: TypeVector [[#BFloat8VecTy:]] [[#BFloat8Ty]] 8 + +; CHECK-SPIRV-DAG: TypeFloat [[#HFloat16Ty:]] 16 {{$}} +; CHECK-SPIRV-DAG: TypeVector [[#HFloat16VecTy:]] [[#HFloat16Ty]] 8 +; CHECK-SPIRV-DAG: Constant [[#HFloat16Ty]] [[#HalfConst:]] 15360 +; CHECK-SPIRV-DAG: ConstantComposite [[#HFloat16VecTy]] [[#HalfVecConst:]] [[#HalfConst]] [[#HalfConst]] [[#HalfConst]] [[#HalfConst]] [[#HalfConst]] [[#HalfConst]] [[#HalfConst]] [[#HalfConst]] + +; CHECK-SPIRV-DAG: TypeFloat [[#BFloat16Ty:]] 16 0 +; CHECK-SPIRV-DAG: TypeVector [[#BFloat16VecTy:]] [[#BFloat16Ty]] 8 +; CHECK-SPIRV-DAG: Constant [[#BFloat16Ty]] [[#BfloatConst:]] 16256 +; CHECK-SPIRV-DAG: ConstantComposite [[#BFloat16VecTy]] [[#BfloatVecConst:]] [[#BfloatConst]] [[#BfloatConst]] [[#BfloatConst]] [[#BfloatConst]] [[#BfloatConst]] [[#BfloatConst]] [[#BfloatConst]] [[#BfloatConst]] + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Followings tests are for 4-bit upconversions + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_hf8_scalar]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1Ty]] [[#Cast1:]] [[#Int4Const]] +; CHECK-SPIRV: FConvert [[#HFloat8Ty]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: fp4e2m1_hf8_scalar +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELi(i4 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @fp4e2m1_hf8_scalar() { +entry: + %0 = call i8 @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELi(i4 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELi(i4) + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_hf8_vector]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1VecTy]] [[#Cast1:]] [[#Int4VecConst]] +; CHECK-SPIRV: FConvert [[#HFloat8VecTy]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: Bitcast [[#Int8VecTy]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: fp4e2m1_hf8_vector +; CHECK-LLVM: %[[#Call:]] = call <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELDv8_i(<8 x i4> splat (i4 1)) +; CHECK-LLVM: ret <8 x i8> %[[#Call]] + +define spir_func <8 x i8> @fp4e2m1_hf8_vector() { +entry: + %0 = call <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELDv8_i(<8 x i4> ) + ret <8 x i8> %0 +} + +declare dso_local spir_func <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE4M3INTELDv8_i(<8 x i4>) + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_bf8_scalar]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1Ty]] [[#Cast1:]] [[#Int4Const]] +; CHECK-SPIRV: FConvert [[#BFloat8Ty]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: fp4e2m1_bf8_scalar +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z38__builtin_spirv_ConvertE2M1ToE5M2INTELi(i4 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @fp4e2m1_bf8_scalar() { +entry: + %0 = call i8 @_Z38__builtin_spirv_ConvertE2M1ToE5M2INTELi(i4 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z38__builtin_spirv_ConvertE2M1ToE5M2INTELi(i4) + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_bf8_vector]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1VecTy]] [[#Cast1:]] [[#Int4VecConst]] +; CHECK-SPIRV: FConvert [[#BFloat8VecTy]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: Bitcast [[#Int8VecTy]] [[#Cast2:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast2]] + +; CHECK-LLVM-LABEL: fp4e2m1_bf8_vector +; CHECK-LLVM: %[[#Call:]] = call <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE5M2INTELDv8_i(<8 x i4> splat (i4 1)) +; CHECK-LLVM: ret <8 x i8> %[[#Call]] + +define spir_func <8 x i8> @fp4e2m1_bf8_vector() { +entry: + %0 = call <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE5M2INTELDv8_i(<8 x i4> ) + ret <8 x i8> %0 +} + +declare dso_local spir_func <8 x i8> @_Z38__builtin_spirv_ConvertE2M1ToE5M2INTELDv8_i(<8 x i4>) + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_hf16_scalar]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1Ty]] [[#Cast1:]] [[#Int4Const]] +; CHECK-SPIRV: FConvert [[#HFloat16Ty]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: ReturnValue [[#Conv]] + +; CHECK-LLVM-LABEL: fp4e2m1_hf16_scalar +; CHECK-LLVM: %[[#Call:]] = call half @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELi(i4 1) +; CHECK-LLVM: ret half %[[#Call]] + +define spir_func half @fp4e2m1_hf16_scalar() { +entry: + %0 = call half @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELi(i4 1) + ret half %0 +} + +declare dso_local spir_func half @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELi(i4) + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_hf16_vector]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1VecTy]] [[#Cast1:]] [[#Int4VecConst]] +; CHECK-SPIRV: FConvert [[#HFloat16VecTy]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: ReturnValue [[#Conv]] + +; CHECK-LLVM-LABEL: fp4e2m1_hf16_vector +; CHECK-LLVM: %[[#Call:]] = call <8 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv8_i(<8 x i4> splat (i4 1)) +; CHECK-LLVM: ret <8 x half> %[[#Call]] + +define spir_func <8 x half> @fp4e2m1_hf16_vector() { +entry: + %0 = call <8 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv8_i(<8 x i4> ) + ret <8 x half> %0 +} + +declare dso_local spir_func <8 x half> @_Z38__builtin_spirv_ConvertE2M1ToFP16INTELDv8_i(<8 x i4>) + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_bf16_scalar]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1Ty]] [[#Cast1:]] [[#Int4Const]] +; CHECK-SPIRV: FConvert [[#BFloat16Ty]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: ReturnValue [[#Conv]] + +; CHECK-LLVM-LABEL: fp4e2m1_bf16_scalar +; CHECK-LLVM: %[[#Call:]] = call bfloat @_Z38__builtin_spirv_ConvertE2M1ToBF16INTELi(i4 1) +; CHECK-LLVM: ret bfloat %[[#Call]] + +define spir_func bfloat @fp4e2m1_bf16_scalar() { +entry: + %0 = call bfloat @_Z38__builtin_spirv_ConvertE2M1ToBF16INTELi(i4 1) + ret bfloat %0 +} + +declare dso_local spir_func bfloat @_Z38__builtin_spirv_ConvertE2M1ToBF16INTELi(i4) + +; CHECK-SPIRV: Function [[#]] [[#fp4e2m1_bf16_vector]] [[#]] +; CHECK-SPIRV: Bitcast [[#E2M1VecTy]] [[#Cast1:]] [[#Int4VecConst]] +; CHECK-SPIRV: FConvert [[#BFloat16VecTy]] [[#Conv:]] [[#Cast1]] +; CHECK-SPIRV: ReturnValue [[#Conv]] + +; CHECK-LLVM-LABEL: fp4e2m1_bf16_vector +; CHECK-LLVM: %[[#Call:]] = call <8 x bfloat> @_Z38__builtin_spirv_ConvertE2M1ToBF16INTELDv8_i(<8 x i4> splat (i4 1)) +; CHECK-LLVM: ret <8 x bfloat> %[[#Call]] + +define spir_func <8 x bfloat> @fp4e2m1_bf16_vector() { +entry: + %0 = call <8 x bfloat> @_Z38__builtin_spirv_ConvertE2M1ToBF16INTELDv8_i(<8 x i4> ) + ret <8 x bfloat> %0 +} + +declare dso_local spir_func <8 x bfloat> @_Z38__builtin_spirv_ConvertE2M1ToBF16INTELDv8_i(<8 x i4>) + +; Following tests are for 4-bit roundings + +; CHECK-SPIRV: Function [[#]] [[#hf16_fp4e2m1_scalar]] [[#]] +; CHECK-SPIRV: FConvert [[#E2M1Ty]] [[#Conv:]] [[#HalfConst]] +; CHECK-SPIRV: Bitcast [[#Int4Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_fp4e2m1_scalar +; CHECK-LLVM: %[[#Call:]] = call i4 @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDh(half 0xH3C00) +; CHECK-LLVM: ret i4 %[[#Call]] + +define spir_func i4 @hf16_fp4e2m1_scalar() { +entry: + %0 = call i4 @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDh(half 1.0) + ret i4 %0 +} + +declare dso_local spir_func i4 @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDh(half) + +; CHECK-SPIRV: Function [[#]] [[#hf16_fp4e2m1_vector]] [[#]] +; CHECK-SPIRV: FConvert [[#E2M1VecTy]] [[#Conv:]] [[#HalfVecConst]] +; CHECK-SPIRV: Bitcast [[#Int4VecTy]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_fp4e2m1_vector +; CHECK-LLVM: %[[#Call:]] = call <8 x i4> @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv8_Dh(<8 x half> splat (half 0xH3C00)) +; CHECK-LLVM: ret <8 x i4> %[[#Call]] + +define spir_func <8 x i4> @hf16_fp4e2m1_vector() { +entry: + %0 = call <8 x i4> @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv8_Dh(<8 x half> ) + ret <8 x i4> %0 +} + +declare dso_local spir_func <8 x i4> @_Z38__builtin_spirv_ConvertFP16ToE2M1INTELDv8_Dh(<8 x half>) + +; CHECK-SPIRV: Function [[#]] [[#bf16_fp4e2m1_scalar]] [[#]] +; CHECK-SPIRV: FConvert [[#E2M1Ty]] [[#Conv:]] [[#BfloatConst]] +; CHECK-SPIRV: Bitcast [[#Int4Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_fp4e2m1_scalar +; CHECK-LLVM: %[[#Call:]] = call i4 @_Z38__builtin_spirv_ConvertBF16ToE2M1INTELDF16b(bfloat 0xR3F80) +; CHECK-LLVM: ret i4 %[[#Call]] + +define spir_func i4 @bf16_fp4e2m1_scalar() { +entry: + %0 = call i4 @_Z38__builtin_spirv_ConvertBF16ToE2M1INTELDF16b(bfloat 1.0) + ret i4 %0 +} + +declare dso_local spir_func i4 @_Z38__builtin_spirv_ConvertBF16ToE2M1INTELDF16b(bfloat) + +; CHECK-SPIRV: Function [[#]] [[#bf16_fp4e2m1_vector]] [[#]] +; CHECK-SPIRV: FConvert [[#E2M1VecTy]] [[#Conv:]] [[#BfloatVecConst]] +; CHECK-SPIRV: Bitcast [[#Int4VecTy]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_fp4e2m1_vector +; CHECK-LLVM: %[[#Call:]] = call <8 x i4> @_Z38__builtin_spirv_ConvertBF16ToE2M1INTELDv8_DF16b(<8 x bfloat> splat (bfloat 0xR3F80)) +; CHECK-LLVM: ret <8 x i4> %[[#Call]] + +define spir_func <8 x i4> @bf16_fp4e2m1_vector() { +entry: + %0 = call <8 x i4> @_Z38__builtin_spirv_ConvertBF16ToE2M1INTELDv8_DF16b(<8 x bfloat> ) + ret <8 x i4> %0 +} + +declare dso_local spir_func <8 x i4> @_Z38__builtin_spirv_ConvertBF16ToE2M1INTELDv8_DF16b(<8 x bfloat>) + diff --git a/test/extensions/SPV_INTEL_fp_conversions/spv_intel_fp_conversions.ll b/test/extensions/SPV_INTEL_fp_conversions/spv_intel_fp_conversions.ll new file mode 100644 index 0000000000..5de60ef332 --- /dev/null +++ b/test/extensions/SPV_INTEL_fp_conversions/spv_intel_fp_conversions.ll @@ -0,0 +1,356 @@ +; This tests checks if FP4 and FP8 conversions specified by +; __builtin_spirv_* external function calls translated correctly. +; This test is for Clamp*, Stochastic*, ClampStochastic* conversions. +; Packed and vector conversions are tested for general case, this test is only +; for scalar + +; RUN: llvm-spirv %s -o %t.spv --spirv-ext=+SPV_EXT_float8,+SPV_INTEL_float4,+SPV_INTEL_int4,+SPV_KHR_bfloat16,+SPV_INTEL_fp_conversions +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.spv -o %t.rev.bc -r +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability Int4TypeINTEL +; CHECK-SPIRV-DAG: Capability Float8EXT +; CHECK-SPIRV-DAG: Capability FloatConversionsINTEL + +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_int4" +; CHECK-SPIRV-DAG: Extension "SPV_EXT_float8" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_float4" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_fp_conversions" + +; CHECK-SPIRV-DAG: Name [[#hf16_hf8_clamp:]] "hf16_hf8_clamp" +; CHECK-SPIRV-DAG: Name [[#hf16_bf8_clamp:]] "hf16_bf8_clamp" +; CHECK-SPIRV-DAG: Name [[#bf16_hf8_clamp:]] "bf16_hf8_clamp" +; CHECK-SPIRV-DAG: Name [[#bf16_bf8_clamp:]] "bf16_bf8_clamp" + +; CHECK-SPIRV-DAG: Name [[#hf16_bf8_stochastic:]] "hf16_bf8_stochastic" +; CHECK-SPIRV-DAG: Name [[#hf16_hf8_stochastic:]] "hf16_hf8_stochastic" +; CHECK-SPIRV-DAG: Name [[#bf16_bf8_stochastic:]] "bf16_bf8_stochastic" +; CHECK-SPIRV-DAG: Name [[#bf16_hf8_stochastic:]] "bf16_hf8_stochastic" +; CHECK-SPIRV-DAG: Name [[#hf16_fp4e2m1_stochastic:]] "hf16_fp4e2m1_stochastic" +; CHECK-SPIRV-DAG: Name [[#bf16_fp4e2m1_stochastic:]] "bf16_fp4e2m1_stochastic" +; CHECK-SPIRV-DAG: Name [[#hf16_int4_stochastic:]] "hf16_int4_stochastic" +; CHECK-SPIRV-DAG: Name [[#bf16_int4_stochastic:]] "bf16_int4_stochastic" +; CHECK-SPIRV-DAG: Name [[#hf16_bf8_clamp_stochastic:]] "hf16_bf8_clamp_stochastic" +; CHECK-SPIRV-DAG: Name [[#bf16_bf8_clamp_stochastic:]] "bf16_bf8_clamp_stochastic" + +; CHECK-SPIRV-DAG: Name [[#hf16_bf8_stochastic_last_seed:]] "hf16_bf8_stochastic_last_seed" +; CHECK-SPIRV-DAG: Name [[#hf16_int4_stochastic_last_seed:]] "hf16_int4_stochastic_last_seed" +; CHECK-SPIRV-DAG: Name [[#hf16_bf8_clamp_stochastic_last_seed:]] "hf16_bf8_clamp_stochastic_last_seed" + +; CHECK-SPIRV-DAG: TypeInt [[#Int8Ty:]] 8 0 +; CHECK-SPIRV-DAG: TypeInt [[#Int32Ty:]] 32 0 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Int32Const:]] 1 +; CHECK-SPIRV-DAG: TypeInt [[#Int4Ty:]] 4 0 + +; CHECK-SPIRV-DAG: TypeFloat [[#E2M1Ty:]] 4 6214 +; CHECK-SPIRV-DAG: TypeFloat [[#HFloat8Ty:]] 8 4214 +; CHECK-SPIRV-DAG: TypeFloat [[#BFloat8Ty:]] 8 4215 + +; CHECK-SPIRV-DAG: TypeFloat [[#HFloat16Ty:]] 16 {{$}} +; CHECK-SPIRV-DAG: Constant [[#HFloat16Ty]] [[#HalfConst:]] 15360 + +; CHECK-SPIRV-DAG: TypeFloat [[#BFloat16Ty:]] 16 0 +; CHECK-SPIRV-DAG: Constant [[#BFloat16Ty]] [[#BfloatConst:]] 16256 + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Followings tests are for clamp rounding + +; CHECK-SPIRV: Function [[#]] [[#hf16_hf8_clamp]] [[#]] +; CHECK-SPIRV: ClampConvertFToFINTEL [[#HFloat8Ty]] [[#Conv:]] [[#HalfConst]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_hf8_clamp +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z43__builtin_spirv_ClampConvertFP16ToE4M3INTELDh(half 0xH3C00) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @hf16_hf8_clamp() { +entry: + %0 = call i8 @_Z43__builtin_spirv_ClampConvertFP16ToE4M3INTELDh(half 1.0) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z43__builtin_spirv_ClampConvertFP16ToE4M3INTELDh(half) + +; CHECK-SPIRV: Function [[#]] [[#hf16_bf8_clamp]] [[#]] +; CHECK-SPIRV: ClampConvertFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#HalfConst]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_bf8_clamp +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z43__builtin_spirv_ClampConvertFP16ToE5M2INTELDh(half 0xH3C00) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @hf16_bf8_clamp() { +entry: + %0 = call i8 @_Z43__builtin_spirv_ClampConvertFP16ToE5M2INTELDh(half 1.0) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z43__builtin_spirv_ClampConvertFP16ToE5M2INTELDh(half) + +; CHECK-SPIRV: Function [[#]] [[#bf16_hf8_clamp]] [[#]] +; CHECK-SPIRV: ClampConvertFToFINTEL [[#HFloat8Ty]] [[#Conv:]] [[#BfloatConst]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_hf8_clamp +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z43__builtin_spirv_ClampConvertBF16ToE4M3INTELDF16b(bfloat 0xR3F80) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @bf16_hf8_clamp() { +entry: + %0 = call i8 @_Z43__builtin_spirv_ClampConvertBF16ToE4M3INTELDF16b(bfloat 1.0) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z43__builtin_spirv_ClampConvertBF16ToE4M3INTELDF16b(bfloat) + +; CHECK-SPIRV: Function [[#]] [[#bf16_bf8_clamp]] [[#]] +; CHECK-SPIRV: ClampConvertFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#BfloatConst]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_bf8_clamp +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z43__builtin_spirv_ClampConvertBF16ToE5M2INTELDF16b(bfloat 0xR3F80) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @bf16_bf8_clamp() { +entry: + %0 = call i8 @_Z43__builtin_spirv_ClampConvertBF16ToE5M2INTELDF16b(bfloat 1.0) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z43__builtin_spirv_ClampConvertBF16ToE5M2INTELDF16b(bfloat) + +; CHECK-SPIRV: Function [[#]] [[#hf16_bf8_stochastic]] [[#]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_bf8_stochastic +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE5M2INTELDhi(half 0xH3C00, i32 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @hf16_bf8_stochastic() { +entry: + %0 = call i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE5M2INTELDhi(half 1.0, i32 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE5M2INTELDhi(half, i32) + +; CHECK-SPIRV: Function [[#]] [[#hf16_hf8_stochastic]] [[#]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#HFloat8Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_hf8_stochastic +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE4M3INTELDhi(half 0xH3C00, i32 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @hf16_hf8_stochastic() { +entry: + %0 = call i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE4M3INTELDhi(half 1.0, i32 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE4M3INTELDhi(half, i32) + +; CHECK-SPIRV: Function [[#]] [[#bf16_bf8_stochastic]] [[#]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#BfloatConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_bf8_stochastic +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z46__builtin_spirv_StochasticRoundBF16ToE5M2INTELDF16bi(bfloat 0xR3F80, i32 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @bf16_bf8_stochastic() { +entry: + %0 = call i8 @_Z46__builtin_spirv_StochasticRoundBF16ToE5M2INTELDF16bi(bfloat 1.0, i32 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z46__builtin_spirv_StochasticRoundBF16ToE5M2INTELDF16bi(bfloat, i32) + +; CHECK-SPIRV: Function [[#]] [[#bf16_hf8_stochastic]] [[#]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#HFloat8Ty]] [[#Conv:]] [[#BfloatConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_hf8_stochastic +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z46__builtin_spirv_StochasticRoundBF16ToE4M3INTELDF16bi(bfloat 0xR3F80, i32 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @bf16_hf8_stochastic() { +entry: + %0 = call i8 @_Z46__builtin_spirv_StochasticRoundBF16ToE4M3INTELDF16bi(bfloat 1.0, i32 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z46__builtin_spirv_StochasticRoundBF16ToE4M3INTELDF16bi(bfloat, i32) + +; CHECK-SPIRV: Function [[#]] [[#hf16_fp4e2m1_stochastic]] [[#]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#E2M1Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int4Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_fp4e2m1_stochastic +; CHECK-LLVM: %[[#Call:]] = call i4 @_Z46__builtin_spirv_StochasticRoundFP16ToE2M1INTELDhi(half 0xH3C00, i32 1) +; CHECK-LLVM: ret i4 %[[#Call]] + +define spir_func i4 @hf16_fp4e2m1_stochastic() { +entry: + %0 = call i4 @_Z46__builtin_spirv_StochasticRoundFP16ToE2M1INTELDhi(half 1.0, i32 1) + ret i4 %0 +} + +declare dso_local spir_func i4 @_Z46__builtin_spirv_StochasticRoundFP16ToE2M1INTELDhi(half, i32) + +; CHECK-SPIRV: Function [[#]] [[#bf16_fp4e2m1_stochastic]] [[#]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#E2M1Ty]] [[#Conv:]] [[#BfloatConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int4Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_fp4e2m1_stochastic +; CHECK-LLVM: %[[#Call:]] = call i4 @_Z46__builtin_spirv_StochasticRoundBF16ToE2M1INTELDF16bi(bfloat 0xR3F80, i32 1) +; CHECK-LLVM: ret i4 %[[#Call]] + +define spir_func i4 @bf16_fp4e2m1_stochastic() { +entry: + %0 = call i4 @_Z46__builtin_spirv_StochasticRoundBF16ToE2M1INTELDF16bi(bfloat 1.0, i32 1) + ret i4 %0 +} + +declare dso_local spir_func i4 @_Z46__builtin_spirv_StochasticRoundBF16ToE2M1INTELDF16bi(bfloat, i32) + +; CHECK-SPIRV: Function [[#]] [[#hf16_int4_stochastic]] [[#]] +; CHECK-SPIRV: ClampStochasticRoundFToSINTEL [[#Int4Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] +; CHECK-SPIRV: ReturnValue [[#Conv]] + +; CHECK-LLVM-LABEL: hf16_int4_stochastic +; CHECK-LLVM: %[[#Call:]] = call i4 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToInt4INTELDhi(half 0xH3C00, i32 1) +; CHECK-LLVM: ret i4 %[[#Call]] + +define spir_func i4 @hf16_int4_stochastic() { +entry: + %0 = call i4 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToInt4INTELhs(half 1.0, i32 1) + ret i4 %0 +} + +declare dso_local spir_func i4 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToInt4INTELhs(half, i32) + +; CHECK-SPIRV: Function [[#]] [[#bf16_int4_stochastic]] [[#]] +; CHECK-SPIRV: ClampStochasticRoundFToSINTEL [[#Int4Ty]] [[#Conv:]] [[#BfloatConst]] [[#Int32Const]] +; CHECK-SPIRV: ReturnValue [[#Conv]] + +; CHECK-LLVM-LABEL: bf16_int4_stochastic +; CHECK-LLVM: %[[#Call:]] = call i4 @_Z51__builtin_spirv_ClampStochasticRoundBF16ToInt4INTELDF16bi(bfloat 0xR3F80, i32 1) +; CHECK-LLVM: ret i4 %[[#Call]] + +define spir_func i4 @bf16_int4_stochastic() { +entry: + %0 = call i4 @_Z51__builtin_spirv_ClampStochasticRoundBF16ToInt4INTELDF16bi(bfloat 1.0, i32 1) + ret i4 %0 +} + +declare dso_local spir_func i4 @_Z51__builtin_spirv_ClampStochasticRoundBF16ToInt4INTELDF16bi(bfloat, i32) + +; CHECK-SPIRV: Function [[#]] [[#hf16_bf8_clamp_stochastic]] [[#]] +; CHECK-SPIRV: ClampStochasticRoundFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_bf8_clamp_stochastic +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToE5M2INTELDhi(half 0xH3C00, i32 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @hf16_bf8_clamp_stochastic() { +entry: + %0 = call i8 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToE5M2INTELDhi(half 1.0, i32 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToE5M2INTELDhi(half, i32) + +; CHECK-SPIRV: Function [[#]] [[#bf16_bf8_clamp_stochastic]] [[#]] +; CHECK-SPIRV: ClampStochasticRoundFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#BfloatConst]] [[#Int32Const]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: bf16_bf8_clamp_stochastic +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z51__builtin_spirv_ClampStochasticRoundBF16ToE5M2INTELDF16bi(bfloat 0xR3F80, i32 1) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @bf16_bf8_clamp_stochastic() { +entry: + %0 = call i8 @_Z51__builtin_spirv_ClampStochasticRoundBF16ToE5M2INTELDF16bi(bfloat 1.0, i32 1) + ret i8 %0 +} + +declare dso_local spir_func i8 @_Z51__builtin_spirv_ClampStochasticRoundBF16ToE5M2INTELDF16bi(bfloat, i32) + +; CHECK-SPIRV: Function [[#]] [[#hf16_bf8_stochastic_last_seed]] [[#]] +; CHECK-SPIRV: Variable [[#]] [[#Ptr:]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] [[#Ptr]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_bf8_stochastic_last_seed +; CHECK-LLVM: %[[#Ptr:]] = alloca i32 +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE5M2INTELDhiPi(half 0xH3C00, i32 1, ptr %[[#Ptr]]) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @hf16_bf8_stochastic_last_seed() { +entry: + %0 = alloca i32 + %1 = call i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE5M2INTELDhiPi(half 1.0, i32 1, ptr %0) + ret i8 %1 +} + +declare dso_local spir_func i8 @_Z46__builtin_spirv_StochasticRoundFP16ToE5M2INTELDhiPi(half, i32, ptr) + +; CHECK-SPIRV: Function [[#]] [[#hf16_int4_stochastic_last_seed]] [[#]] +; CHECK-SPIRV: Variable [[#]] [[#Ptr:]] +; CHECK-SPIRV: ClampStochasticRoundFToSINTEL [[#Int4Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] [[#Ptr]] +; CHECK-SPIRV: ReturnValue [[#Conv]] + +; CHECK-LLVM-LABEL: hf16_int4_stochastic_last_seed +; CHECK-LLVM: %[[#Ptr:]] = alloca i32 +; CHECK-LLVM: %[[#Call:]] = call i4 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToInt4INTELDhiPi(half 0xH3C00, i32 1, ptr %[[#Ptr]]) +; CHECK-LLVM: ret i4 %[[#Call]] + +define spir_func i4 @hf16_int4_stochastic_last_seed() { +entry: + %0 = alloca i32 + %1 = call i4 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToInt4INTELhiPi(half 1.0, i32 1, ptr %0) + ret i4 %1 +} + +declare dso_local spir_func i4 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToInt4INTELhiPi(half, i32, ptr) + +; CHECK-SPIRV: Function [[#]] [[#hf16_bf8_clamp_stochastic_last_seed]] [[#]] +; CHECK-SPIRV: Variable [[#]] [[#Ptr:]] +; CHECK-SPIRV: ClampStochasticRoundFToFINTEL [[#BFloat8Ty]] [[#Conv:]] [[#HalfConst]] [[#Int32Const]] [[#Ptr]] +; CHECK-SPIRV: Bitcast [[#Int8Ty]] [[#Cast:]] [[#Conv]] +; CHECK-SPIRV: ReturnValue [[#Cast]] + +; CHECK-LLVM-LABEL: hf16_bf8_clamp_stochastic_last_seed +; CHECK-LLVM: %[[#Ptr:]] = alloca i32 +; CHECK-LLVM: %[[#Call:]] = call i8 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToE5M2INTELDhiPi(half 0xH3C00, i32 1, ptr %[[#Ptr]]) +; CHECK-LLVM: ret i8 %[[#Call]] + +define spir_func i8 @hf16_bf8_clamp_stochastic_last_seed() { +entry: + %0 = alloca i32 + %1 = call i8 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToE5M2INTELDhiPi(half 1.0, i32 1, ptr %0) + ret i8 %1 +} + +declare dso_local spir_func i8 @_Z51__builtin_spirv_ClampStochasticRoundFP16ToE5M2INTELDhiPi(half, i32, ptr) diff --git a/test/extensions/SPV_INTEL_fp_conversions/spv_intel_fp_conversions_spv_ir.ll b/test/extensions/SPV_INTEL_fp_conversions/spv_intel_fp_conversions_spv_ir.ll new file mode 100644 index 0000000000..053b659803 --- /dev/null +++ b/test/extensions/SPV_INTEL_fp_conversions/spv_intel_fp_conversions_spv_ir.ll @@ -0,0 +1,47 @@ +; Test for conversions, that don't require special type interpretation. + +; RUN: llvm-spirv %s -o %t.spv --spirv-ext=+SPV_INTEL_fp_conversions +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.spv -o %t.rev.bc -r --spirv-target-env=SPV-IR +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability FloatConversionsINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_fp_conversions" +; CHECK-SPIRV-DAG: TypeFloat [[#HalfTy:]] 16 +; CHECK-SPIRV-DAG: TypeFloat [[#FloatTy:]] 32 +; CHECK-SPIRV-DAG: TypeInt [[#ShortTy:]] 16 0 +; CHECK-SPIRV-DAG: Constant [[#ShortTy]] [[#IntConst:]] 4 +; CHECK-SPIRV-DAG: Constant [[#FloatTy]] [[#FPConst:]] 1065353216 + +; CHECK-SPIRV: ClampConvertFToFINTEL [[#HalfTy]] [[#]] [[#FPConst]] +; CHECK-SPIRV: StochasticRoundFToFINTEL [[#HalfTy]] [[#]] [[#FPConst]] [[#IntConst]] +; CHECK-SPIRV: ClampStochasticRoundFToFINTEL [[#HalfTy]] [[#]] [[#FPConst]] [[#IntConst]] +; CHECK-SPIRV: ClampStochasticRoundFToFINTEL [[#HalfTy]] [[#]] [[#FPConst]] [[#IntConst]] [[#]] + +; CHECK-LLVM: call spir_func half @_Z35__spirv_ClampConvertFToFINTEL_Rhalff(float 1.000000e+00) +; CHECK-LLVM: call spir_func half @_Z38__spirv_StochasticRoundFToFINTEL_Rhalffs(float 1.000000e+00, i16 4) +; CHECK-LLVM: call spir_func half @_Z43__spirv_ClampStochasticRoundFToFINTEL_Rhalffs(float 1.000000e+00, i16 4) +; CHECK-LLVM: call spir_func half @_Z43__spirv_ClampStochasticRoundFToFINTEL_RhalffsPs(float 1.000000e+00, i16 4, ptr null) + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; Function Attrs: nounwind readnone +define spir_func void @foo() { +entry: + %0 = call spir_func half @_Z29__spirv_ClampConvertFToFINTELf(float 1.0) + %2 = call spir_func half @_Z32__spirv_StochasticRoundFToFINTELfs(float 1.0, i16 4) + %3 = call spir_func half @_Z37__spirv_ClampStochasticRoundFToFINTELfs(float 1.0, i16 4) + %5 = call spir_func half @_Z37__spirv_ClampStochasticRoundFToFINTELfsPs(float 1.0, i16 4, ptr null) + ret void +} + +declare dso_local spir_func half @_Z29__spirv_ClampConvertFToFINTELf(float) + +declare dso_local spir_func half @_Z32__spirv_StochasticRoundFToFINTELfs(float, i16) + +declare dso_local spir_func half @_Z37__spirv_ClampStochasticRoundFToFINTELfs(float, i16) + +declare dso_local spir_func half @_Z37__spirv_ClampStochasticRoundFToFINTELfsPs(float, i16, ptr)