diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp index aadfb0ceae..a403975eb8 100644 --- a/lib/SPIRV/OCLUtil.cpp +++ b/lib/SPIRV/OCLUtil.cpp @@ -898,6 +898,7 @@ SPIRAddressSpace getOCLOpaqueTypeAddrSpace(Op OpCode) { case OpTypeSampler: return SPIRV_SAMPLER_T_ADDR_SPACE; case internal::OpTypeJointMatrixINTEL: + case internal::OpTypeJointMatrixINTELv2: case OpTypeCooperativeMatrixKHR: return SPIRAS_Global; default: diff --git a/lib/SPIRV/SPIRVInternal.h b/lib/SPIRV/SPIRVInternal.h index ced8c147a6..b00938255e 100644 --- a/lib/SPIRV/SPIRVInternal.h +++ b/lib/SPIRV/SPIRVInternal.h @@ -318,6 +318,7 @@ const static char PipeStorage[] = "PipeStorage"; const static char ConstantPipeStorage[] = "ConstantPipeStorage"; const static char VmeImageINTEL[] = "VmeImageINTEL"; const static char JointMatrixINTEL[] = "JointMatrixINTEL"; +const static char BufferSurfaceINTEL[] = "BufferSurfaceINTEL"; const static char CooperativeMatrixKHR[] = "CooperativeMatrixKHR"; } // namespace kSPIRVTypeName @@ -976,6 +977,7 @@ template <> inline void SPIRVMap::init() { _SPIRV_OP(AvcRefResultINTEL) _SPIRV_OP(AvcSicResultINTEL) _SPIRV_OP(VmeImageINTEL) + _SPIRV_OP(BufferSurfaceINTEL) _SPIRV_OP(CooperativeMatrixKHR) #undef _SPIRV_OP add("JointMatrixINTEL", internal::OpTypeJointMatrixINTEL); diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index 581f5ee192..0a64d9647c 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -472,15 +472,26 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool UseTPT) { auto *MT = static_cast(T); auto R = static_cast(MT->getRows())->getZExtIntValue(); auto C = static_cast(MT->getColumns())->getZExtIntValue(); - auto L = static_cast(MT->getLayout())->getZExtIntValue(); - auto S = static_cast(MT->getScope())->getZExtIntValue(); - SmallVector Params = {(unsigned)R, (unsigned)C, (unsigned)L, - (unsigned)S}; + std::vector Params = {(unsigned)R, (unsigned)C}; + if (auto *Layout = MT->getLayout()) + Params.push_back(static_cast(Layout)->getZExtIntValue()); + Params.push_back( + static_cast(MT->getScope())->getZExtIntValue()); if (auto *Use = MT->getUse()) Params.push_back(static_cast(Use)->getZExtIntValue()); - return mapType(T, getSPIRVType(internal::OpTypeJointMatrixINTEL, - transTypeToOCLTypeName(MT->getCompType()), - Params, !UseTPT)); + auto *CTI = MT->getComponentTypeInterpretation(); + if (!CTI) + return mapType( + T, llvm::TargetExtType::get(*Context, "spirv.JointMatrixINTEL", + transType(MT->getCompType()), Params)); + const unsigned CTIValue = + static_cast(CTI)->getZExtIntValue(); + assert(CTIValue <= internal::InternalJointMatrixCTI::PackedInt4 && + "Unknown matrix component type interpretation"); + Params.push_back(CTIValue); + return mapType( + T, llvm::TargetExtType::get(*Context, "spirv.JointMatrixINTEL", + transType(MT->getCompType()), Params)); } case OpTypeCooperativeMatrixKHR: { auto *MT = static_cast(T); diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp index ac3d805d4d..268d356bc4 100644 --- a/lib/SPIRV/SPIRVWriter.cpp +++ b/lib/SPIRV/SPIRVWriter.cpp @@ -649,21 +649,6 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(Type *ET, unsigned AddrSpc) { transType(ET))); } } else { - // JointMatrixINTEL type is not necessarily an opaque type, it can be - // represented as a structure with pointer to a multidimensional array - // member. - if (ST && ST->hasName()) { - StringRef STName = ST->getName(); - if (STName.startswith(kSPIRVTypeName::PrefixAndDelim)) { - SmallVector Postfixes; - auto TN = decodeSPIRVTypeName(STName, Postfixes); - if (TN == kSPIRVTypeName::JointMatrixINTEL) { - SPIRVType *TranslatedTy = transSPIRVJointMatrixINTELType(Postfixes); - PointeeTypeMap[TypeKey] = TranslatedTy; - return TranslatedTy; - } - } - } SPIRVType *ElementType = transType(ET); // ET, as a recursive type, may contain exactly the same pointer T, so it // may happen that after translation of ET we already have translated T, @@ -698,56 +683,6 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(SPIRVType *ET, unsigned AddrSpc) { return TranslatedTy; } -// Representation in LLVM IR before the translator is a pointer to an opaque -// structure: -// %spirv.JointMatrixINTEL._%element_type%_%rows%_%cols%_%scope%_%use% -// Here we check the structure name yet again. Another option would be to -// check SPIR-V friendly function calls (by their name) and obtain return -// or their parameter types, assuming, that the appropriate types are Matrix -// structure type. But in the near future, we will reuse Composite -// instructions to do, for example, matrix initialization directly on AMX -// register by OpCompositeConstruct. And we can't claim, that the Result type -// of OpCompositeConstruct instruction is always the joint matrix type, it's -// simply not true. -SPIRVType *LLVMToSPIRVBase::transSPIRVJointMatrixINTELType( - SmallVector Postfixes) { - Type *ElemTy = nullptr; - StringRef Ty{Postfixes[0]}; - auto NumBits = llvm::StringSwitch(Ty) - .Case("char", 8) - .Case("short", 16) - .Case("int", 32) - .Case("long", 64) - .Default(0); - if (NumBits) - ElemTy = IntegerType::get(M->getContext(), NumBits); - else if (Ty == "half") - ElemTy = Type::getHalfTy(M->getContext()); - else if (Ty == "float") - ElemTy = Type::getFloatTy(M->getContext()); - else if (Ty == "double") - ElemTy = Type::getDoubleTy(M->getContext()); - else if (Ty == "bfloat16") - ElemTy = Type::getInt16Ty(M->getContext()); - else - llvm_unreachable("Unexpected type for matrix!"); - - auto ParseInteger = [this](StringRef Postfix) -> ConstantInt * { - unsigned long long N = 0; - if (consumeUnsignedInteger(Postfix, 10, N)) { - BM->getErrorLog().checkError( - false, SPIRVEC_InvalidLlvmModule, - "TypeJointMatrixINTEL expects integer parameters"); - return 0; - } - return getUInt32(M, N); - }; - std::vector Args; - for (size_t I = 1; I != Postfixes.size(); ++I) - Args.emplace_back(transConstant(ParseInteger(Postfixes[I]))); - return BM->addJointMatrixINTELType(transType(ElemTy), Args); -} - SPIRVType *LLVMToSPIRVBase::transSPIRVOpaqueType(StringRef STName, unsigned AddrSpace) { std::pair Key = {STName, AddrSpace}; @@ -804,9 +739,7 @@ SPIRVType *LLVMToSPIRVBase::transSPIRVOpaqueType(StringRef STName, return SaveType(BM->addQueueType()); else if (TN == kSPIRVTypeName::PipeStorage) return SaveType(BM->addPipeStorageType()); - else if (TN == kSPIRVTypeName::JointMatrixINTEL) { - return SaveType(transSPIRVJointMatrixINTELType(Postfixes)); - } else + else return SaveType( BM->addOpaqueGenericType(SPIRVOpaqueTypeOpCodeMap::map(TN))); } diff --git a/lib/SPIRV/libSPIRV/SPIRVEntry.cpp b/lib/SPIRV/libSPIRV/SPIRVEntry.cpp index 16a918b18e..b860eb9d6e 100644 --- a/lib/SPIRV/libSPIRV/SPIRVEntry.cpp +++ b/lib/SPIRV/libSPIRV/SPIRVEntry.cpp @@ -84,6 +84,10 @@ SPIRVEntry *SPIRVEntry::create(Op OpCode) { static const OpToFactoryMapTy OpToFactoryMap(std::begin(Table), std::end(Table)); + // TODO: To remove this when we make a switch to new version + if (OpCode == internal::OpTypeJointMatrixINTELv2) + OpCode = internal::OpTypeJointMatrixINTEL; + OpToFactoryMapTy::const_iterator Loc = OpToFactoryMap.find(OpCode); if (Loc != OpToFactoryMap.end()) return Loc->second(); diff --git a/lib/SPIRV/libSPIRV/SPIRVEnum.h b/lib/SPIRV/libSPIRV/SPIRVEnum.h index c691c4e4f4..8bbb3b7bb0 100644 --- a/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -207,10 +207,20 @@ template <> inline void SPIRVMap::init() { {CapabilitySubgroupAvcMotionEstimationIntraINTEL}); ADD_VEC_INIT(internal::CapabilityJointMatrixWIInstructionsINTEL, {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixTF32ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixBF16ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixPackedInt2ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixPackedInt4ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); ADD_VEC_INIT(internal::CapabilityCooperativeMatrixCheckedInstructionsINTEL, {CapabilityCooperativeMatrixKHR}); ADD_VEC_INIT(internal::CapabilityCooperativeMatrixPrefetchINTEL, {CapabilityCooperativeMatrixKHR}); + ADD_VEC_INIT(internal::CapabilityCooperativeMatrixInvocationInstructionsINTEL, + {CapabilityCooperativeMatrixKHR}); } template <> inline void SPIRVMap::init() { diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index c6fc1bd678..82c7f33f19 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -1991,6 +1991,7 @@ class SPIRVCompositeConstruct : public SPIRVInstruction { case OpTypeArray: case OpTypeStruct: case internal::OpTypeJointMatrixINTEL: + case internal::OpTypeJointMatrixINTELv2: case OpTypeCooperativeMatrixKHR: break; default: @@ -3406,10 +3407,17 @@ template class SPIRVBfloat16ConversionINTELInstBase : public SPIRVUnaryInst { protected: SPIRVCapVec getRequiredCapability() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + return getVec(internal::CapabilityBfloat16ConversionINTEL, + internal::CapabilityJointMatrixBF16ComponentTypeINTEL); return getVec(internal::CapabilityBfloat16ConversionINTEL); } std::optional getRequiredExtension() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + this->getModule()->addExtension(ExtensionID::SPV_INTEL_joint_matrix); return ExtensionID::SPV_INTEL_bfloat16_conversion; } @@ -3438,8 +3446,25 @@ class SPIRVBfloat16ConversionINTELInstBase : public SPIRVUnaryInst { } auto InstName = OpCodeNameMap::map(OC); - SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + auto *Module = this->getModule(); + SPIRVErrorLog &SPVErrLog = Module->getErrorLog(); + // Cooperative matrix type is allowed as input/output of the instruction + // if SPV_INTEL_joint_matrix is enabled + if (ResCompTy->isTypeCooperativeMatrixKHR()) { + SPVErrLog.checkError( + Module->isAllowedToUseExtension(ExtensionID::SPV_INTEL_joint_matrix), + SPIRVEC_InvalidInstruction, + InstName + "\nCan be used with " + "cooperative matrices only when SPV_INTEL_joint_matrix is " + "enabled\n"); + assert(InCompTy->isTypeCooperativeMatrixKHR() && + "Input must also be a cooperative matrix"); + ResCompTy = static_cast(ResCompTy) + ->getCompType(); + InCompTy = + static_cast(InCompTy)->getCompType(); + } if (OC == internal::OpConvertFToBF16INTEL) { SPVErrLog.checkError( ResCompTy->isTypeInt(16), SPIRVEC_InvalidInstruction, @@ -3492,10 +3517,10 @@ class SPIRVJointMatrixINTELInst : public SPIRVJointMatrixINTELInstBase { SPIRV##x##INTEL; _SPIRV_OP(JointMatrixLoad, true, 6, true) _SPIRV_OP(JointMatrixStore, false, 5, true) -_SPIRV_OP(JointMatrixMad, true, 7) -_SPIRV_OP(JointMatrixSUMad, true, 7) -_SPIRV_OP(JointMatrixUSMad, true, 7) -_SPIRV_OP(JointMatrixUUMad, true, 7) +_SPIRV_OP(JointMatrixMad, true, 6, true) +_SPIRV_OP(JointMatrixSUMad, true, 6, true) +_SPIRV_OP(JointMatrixUSMad, true, 6, true) +_SPIRV_OP(JointMatrixUUMad, true, 6, true) // TODO: move to SPIRVJointMatrixINTELWorkItemInst _SPIRV_OP(JointMatrixWorkItemLength, true, 4) #undef _SPIRV_OP @@ -3529,7 +3554,27 @@ class SPIRVCooperativeMatrixPrefetchINTELInstBase typedef SPIRVInstTemplate \ SPIRV##x##INTEL; -_SPIRV_OP(CooperativeMatrixPrefetch, false, 8, true, 5) +_SPIRV_OP(CooperativeMatrixPrefetch, false, 6, true, 3) +#undef _SPIRV_OP + +class SPIRVCooperativeMatrixInvocationInstructionsINTELInstBase + : public SPIRVInstTemplateBase { +protected: + std::optional getRequiredExtension() const override { + return ExtensionID::SPV_INTEL_joint_matrix; + } + SPIRVCapVec getRequiredCapability() const override { + return getVec( + internal::CapabilityCooperativeMatrixInvocationInstructionsINTEL); + } +}; + +#define _SPIRV_OP(x, ...) \ + typedef SPIRVInstTemplate< \ + SPIRVCooperativeMatrixInvocationInstructionsINTELInstBase, \ + internal::Op##x##INTEL, __VA_ARGS__> \ + SPIRV##x##INTEL; +_SPIRV_OP(CooperativeMatrixApplyFunction, true, 5) #undef _SPIRV_OP class SPIRVCooperativeMatrixKHRInstBase : public SPIRVInstTemplateBase { @@ -3813,10 +3858,17 @@ template class SPIRVTensorFloat32RoundingINTELInstBase : public SPIRVUnaryInst { protected: SPIRVCapVec getRequiredCapability() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + return getVec(internal::CapabilityTensorFloat32RoundingINTEL, + internal::CapabilityJointMatrixTF32ComponentTypeINTEL); return getVec(internal::CapabilityTensorFloat32RoundingINTEL); } std::optional getRequiredExtension() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + this->getModule()->addExtension(ExtensionID::SPV_INTEL_joint_matrix); return ExtensionID::SPV_INTEL_tensor_float32_conversion; } @@ -3845,7 +3897,25 @@ class SPIRVTensorFloat32RoundingINTELInstBase : public SPIRVUnaryInst { } auto InstName = OpCodeNameMap::map(OC); - SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + auto *Module = this->getModule(); + SPIRVErrorLog &SPVErrLog = Module->getErrorLog(); + + // Cooperative matrix type is allowed as input/output of the instruction + // if SPV_INTEL_joint_matrix is enabled + if (ResCompTy->isTypeCooperativeMatrixKHR()) { + SPVErrLog.checkError( + Module->isAllowedToUseExtension(ExtensionID::SPV_INTEL_joint_matrix), + SPIRVEC_InvalidInstruction, + InstName + "\nCan be used with " + "cooperative matrices only when SPV_INTEL_joint_matrix is " + "enabled\n"); + assert(InCompTy->isTypeCooperativeMatrixKHR() && + "Input must also be a cooperative matrix"); + ResCompTy = static_cast(ResCompTy) + ->getCompType(); + InCompTy = + static_cast(InCompTy)->getCompType(); + } SPVErrLog.checkError( ResCompTy->isTypeFloat(32), SPIRVEC_InvalidInstruction, diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index a0f1e4e1e2..b37434796f 100644 --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -652,8 +652,18 @@ template <> inline void SPIRVMap::init() { add(internal::CapabilityCacheControlsINTEL, "CacheControlsINTEL"); add(internal::CapabilityJointMatrixWIInstructionsINTEL, "JointMatrixWIInstructionsINTEL"); + add(internal::CapabilityJointMatrixTF32ComponentTypeINTEL, + "JointMatrixTF32ComponentTypeINTEL"); + add(internal::CapabilityJointMatrixBF16ComponentTypeINTEL, + "JointMatrixBF16ComponentTypeINTEL"); + add(internal::CapabilityJointMatrixPackedInt2ComponentTypeINTEL, + "JointMatrixPackedInt2ComponentTypeINTEL"); + add(internal::CapabilityJointMatrixPackedInt4ComponentTypeINTEL, + "JointMatrixPackedInt4ComponentTypeINTEL"); add(internal::CapabilityCooperativeMatrixPrefetchINTEL, "CooperativeMatrixPrefetchINTEL"); + add(internal::CapabilityCooperativeMatrixInvocationInstructionsINTEL, + "CooperativeMatrixInvocationInstructionsINTEL"); add(internal::CapabilityCooperativeMatrixCheckedInstructionsINTEL, "CooperativeMatrixCheckedInstructionsINTEL"); add(internal::CapabilityBindlessImagesINTEL, "BindlessImagesINTEL"); diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h index 7af20dd43c..cfa480faf0 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h @@ -230,6 +230,7 @@ inline bool isTypeOpCode(Op OpCode) { isSubgroupAvcINTELTypeOpCode(OpCode) || OC == OpTypeVmeImageINTEL || isVCOpCode(OpCode) || OC == internal::OpTypeTokenINTEL || OC == internal::OpTypeJointMatrixINTEL || + OC == internal::OpTypeJointMatrixINTELv2 || OC == OpTypeCooperativeMatrixKHR; } diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h index 9fb2825b1b..a84ee56507 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h @@ -6,6 +6,7 @@ _SPIRV_OP_INTERNAL(ArithmeticFenceINTEL, internal::OpArithmeticFenceINTEL) _SPIRV_OP_INTERNAL(ConvertFToBF16INTEL, internal::OpConvertFToBF16INTEL) _SPIRV_OP_INTERNAL(ConvertBF16ToFINTEL, internal::OpConvertBF16ToFINTEL) _SPIRV_OP_INTERNAL(TypeJointMatrixINTEL, internal::OpTypeJointMatrixINTEL) +_SPIRV_OP_INTERNAL(TypeJointMatrixINTEL, internal::OpTypeJointMatrixINTEL) _SPIRV_OP_INTERNAL(JointMatrixLoadINTEL, internal::OpJointMatrixLoadINTEL) _SPIRV_OP_INTERNAL(JointMatrixStoreINTEL, internal::OpJointMatrixStoreINTEL) _SPIRV_OP_INTERNAL(JointMatrixMadINTEL, internal::OpJointMatrixMadINTEL) @@ -24,6 +25,8 @@ _SPIRV_OP_INTERNAL(CooperativeMatrixConstructCheckedINTEL, internal::OpCooperativeMatrixConstructCheckedINTEL) _SPIRV_OP_INTERNAL(CooperativeMatrixPrefetchINTEL, internal::OpCooperativeMatrixPrefetchINTEL) +_SPIRV_OP_INTERNAL(CooperativeMatrixApplyFunctionINTEL, + internal::OpCooperativeMatrixApplyFunctionINTEL) _SPIRV_OP_INTERNAL(ComplexFMulINTEL, internal::ComplexFMulINTEL) _SPIRV_OP_INTERNAL(ComplexFDivINTEL, internal::ComplexFDivINTEL) _SPIRV_OP_INTERNAL(MaskedGatherINTEL, internal::OpMaskedGatherINTEL) diff --git a/lib/SPIRV/libSPIRV/SPIRVType.cpp b/lib/SPIRV/libSPIRV/SPIRVType.cpp index 9f7aac0b2c..51779146c9 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.cpp +++ b/lib/SPIRV/libSPIRV/SPIRVType.cpp @@ -206,7 +206,8 @@ bool SPIRVType::isTypeStruct() const { return OpCode == OpTypeStruct; } bool SPIRVType::isTypeVector() const { return OpCode == OpTypeVector; } bool SPIRVType::isTypeJointMatrixINTEL() const { - return OpCode == internal::OpTypeJointMatrixINTEL; + return OpCode == internal::OpTypeJointMatrixINTEL || + OpCode == internal::OpTypeJointMatrixINTELv2; } bool SPIRVType::isTypeCooperativeMatrixKHR() const { @@ -290,13 +291,20 @@ void SPIRVTypeForwardPointer::decode(std::istream &I) { } SPIRVTypeJointMatrixINTEL::SPIRVTypeJointMatrixINTEL( - SPIRVModule *M, SPIRVId TheId, SPIRVType *CompType, + SPIRVModule *M, SPIRVId TheId, Op OC, SPIRVType *CompType, std::vector Args) : SPIRVType(M, FixedWC + Args.size(), OC, TheId), CompType(CompType), - Args(Args) {} + Args(std::move(Args)) {} + +SPIRVTypeJointMatrixINTEL::SPIRVTypeJointMatrixINTEL( + SPIRVModule *M, SPIRVId TheId, SPIRVType *CompType, + std::vector Args) + : SPIRVType(M, FixedWC + Args.size(), internal::OpTypeJointMatrixINTEL, + TheId), + CompType(CompType), Args(std::move(Args)) {} SPIRVTypeJointMatrixINTEL::SPIRVTypeJointMatrixINTEL() - : SPIRVType(OC), CompType(nullptr), + : SPIRVType(internal::OpTypeJointMatrixINTEL), CompType(nullptr), Args({nullptr, nullptr, nullptr, nullptr}) {} void SPIRVTypeJointMatrixINTEL::encode(spv_ostream &O) const { @@ -329,4 +337,22 @@ void SPIRVTypeCooperativeMatrixKHR::decode(std::istream &I) { Decoder >> Id >> CompType >> Args; } +void SPIRVTypeCooperativeMatrixKHR::validate() const { + SPIRVEntry::validate(); + SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + SPIRVConstant *UseConst = static_cast(this->getUse()); + auto InstName = OpCodeNameMap::map(OC); + uint64_t UseValue = UseConst->getZExtIntValue(); + SPVErrLog.checkError( + (UseValue <= CooperativeMatrixUseMatrixAccumulatorKHR), + SPIRVEC_InvalidInstruction, + InstName + "\nIncorrect Use parameter, should be MatrixA, MatrixB or " + "Accumulator\n"); + SPIRVConstant *ScopeConst = static_cast(this->getScope()); + uint64_t ScopeValue = ScopeConst->getZExtIntValue(); + SPVErrLog.checkError((ScopeValue <= ScopeInvocation), + SPIRVEC_InvalidInstruction, + InstName + "\nUnsupported Scope parameter\n"); +} + } // namespace SPIRV diff --git a/lib/SPIRV/libSPIRV/SPIRVType.h b/lib/SPIRV/libSPIRV/SPIRVType.h index 9c9325d88b..1e4789d799 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/lib/SPIRV/libSPIRV/SPIRVType.h @@ -1062,13 +1062,18 @@ class SPIRVTypeTokenINTEL : public SPIRVType { }; class SPIRVTypeJointMatrixINTEL : public SPIRVType { + Op OC; SPIRVType *CompType; std::vector Args; public: - const static Op OC = internal::OpTypeJointMatrixINTEL; const static SPIRVWord FixedWC = 3; - // Complete constructor + // Complete constructor with non-default OC + SPIRVTypeJointMatrixINTEL(SPIRVModule *M, SPIRVId TheId, Op OC, + SPIRVType *CompType, + std::vector Args); + + // Incomplete constructor for default OC SPIRVTypeJointMatrixINTEL(SPIRVModule *M, SPIRVId TheId, SPIRVType *CompType, std::vector Args); // Incomplete constructor @@ -1087,15 +1092,43 @@ class SPIRVTypeJointMatrixINTEL : public SPIRVType { SPIRVType *getCompType() const { return CompType; } SPIRVValue *getRows() const { return Args[0]; } SPIRVValue *getColumns() const { return Args[1]; } - SPIRVValue *getLayout() const { return Args[2]; } - SPIRVValue *getScope() const { return Args[3]; } - SPIRVValue *getUse() const { return Args.size() > 4 ? Args[4] : nullptr; } + + SPIRVValue *getLayout() const { + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args[2]; + return nullptr; + } + + SPIRVValue *getScope() const { + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args[3]; + return Args[2]; + } + + SPIRVValue *getUse() const { + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args.size() > 4 ? Args[4] : nullptr; + return Args[3]; + } + + SPIRVValue *getComponentTypeInterpretation() const { + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args.size() > 5 ? Args[5] : nullptr; + return Args.size() > 4 ? Args[4] : nullptr; + } + + std::vector getNonLiteralOperands() const override { + return std::vector(1, CompType); + } }; class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { SPIRVType *CompType; std::vector Args; +protected: + void validate() const override; + public: const static Op OC = OpTypeCooperativeMatrixKHR; const static SPIRVWord FixedWC = 7; @@ -1118,6 +1151,10 @@ class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { SPIRVValue *getRows() const { return Args[1]; } SPIRVValue *getColumns() const { return Args[2]; } SPIRVValue *getUse() const { return Args[3]; } + + std::vector getNonLiteralOperands() const override { + return std::vector(1, CompType); + } }; } // namespace SPIRV diff --git a/lib/SPIRV/libSPIRV/spirv_internal.hpp b/lib/SPIRV/libSPIRV/spirv_internal.hpp index b1f91405fd..07d75a2db1 100644 --- a/lib/SPIRV/libSPIRV/spirv_internal.hpp +++ b/lib/SPIRV/libSPIRV/spirv_internal.hpp @@ -68,6 +68,7 @@ enum InternalOp { IOpJointMatrixSUMadINTEL = 6128, IOpJointMatrixUSMadINTEL = 6129, IOpJointMatrixUUMadINTEL = 6130, + IOpTypeJointMatrixINTELv2 = 6184, IOpArithmeticFenceINTEL = 6145, IOpCooperativeMatrixLoadCheckedINTEL = 6193, IOpCooperativeMatrixStoreCheckedINTEL = 6194, @@ -79,6 +80,7 @@ enum InternalOp { IOpMaskedGatherINTEL = 6428, IOpMaskedScatterINTEL = 6429, IOpJointMatrixGetElementCoordINTEL = 6440, + IOpCooperativeMatrixApplyFunctionINTEL = 6448, IOpCooperativeMatrixPrefetchINTEL = 6449, IOpConvertHandleToImageINTEL = 6529, IOpConvertHandleToSamplerINTEL = 6530, @@ -113,6 +115,11 @@ enum InternalCapability { ICapabilityTensorFloat32RoundingINTEL = 6425, ICapabilityMaskedGatherScatterINTEL = 6427, ICapabilityJointMatrixWIInstructionsINTEL = 6435, + ICapabilityCooperativeMatrixInvocationInstructionsINTEL = 6435, + ICapabilityJointMatrixTF32ComponentTypeINTEL = 6436, + ICapabilityJointMatrixBF16ComponentTypeINTEL = 6437, + ICapabilityJointMatrixPackedInt2ComponentTypeINTEL = 6438, + ICapabilityJointMatrixPackedInt4ComponentTypeINTEL = 6439, ICapabilityCacheControlsINTEL = 6441, ICapRegisterLimitsINTEL = 6460, ICapabilityBindlessImagesINTEL = 6528 @@ -139,6 +146,14 @@ enum InternalJointMatrixLayout { enum InternalJointMatrixUse { MatrixA = 0, MatrixB = 1, Accumulator = 2 }; +enum InternalJointMatrixCTI { + None = 0, + TF32 = 1, + Bfloat16 = 2, + PackedInt2 = 3, + PackedInt4 = 4 +}; + enum InternalBuiltIn { IBuiltInSubDeviceIDINTEL = 6135, IBuiltInGlobalHWThreadIDINTEL = 6136, @@ -162,7 +177,12 @@ enum class StoreCacheControlINTEL { #define _SPIRV_OP(x, y) constexpr x x##y = static_cast(I##x##y); _SPIRV_OP(Capability, JointMatrixINTEL) _SPIRV_OP(Capability, JointMatrixWIInstructionsINTEL) +_SPIRV_OP(Capability, JointMatrixTF32ComponentTypeINTEL) +_SPIRV_OP(Capability, JointMatrixBF16ComponentTypeINTEL) +_SPIRV_OP(Capability, JointMatrixPackedInt2ComponentTypeINTEL) +_SPIRV_OP(Capability, JointMatrixPackedInt4ComponentTypeINTEL) _SPIRV_OP(Op, TypeJointMatrixINTEL) +_SPIRV_OP(Op, TypeJointMatrixINTELv2) _SPIRV_OP(Op, JointMatrixLoadINTEL) _SPIRV_OP(Op, JointMatrixStoreINTEL) _SPIRV_OP(Op, JointMatrixMadINTEL) @@ -179,6 +199,9 @@ _SPIRV_OP(Op, CooperativeMatrixConstructCheckedINTEL) _SPIRV_OP(Capability, CooperativeMatrixPrefetchINTEL) _SPIRV_OP(Op, CooperativeMatrixPrefetchINTEL) +_SPIRV_OP(Capability, CooperativeMatrixInvocationInstructionsINTEL) +_SPIRV_OP(Op, CooperativeMatrixApplyFunctionINTEL) + _SPIRV_OP(Capability, HWThreadQueryINTEL) _SPIRV_OP(BuiltIn, SubDeviceIDINTEL) _SPIRV_OP(BuiltIn, GlobalHWThreadIDINTEL) diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll new file mode 100644 index 0000000000..0571af5dd9 --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll @@ -0,0 +1,436 @@ +;; Compiled from joint_matrix_bf16_fill_k_cache.cpp from https://github.com/intel/llvm +;; command: clang++ -fsycl -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp -fsycl-device-only -o test.bc + +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability JointMatrixINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV: TypeInt [[#Int16Ty:]] 16 0 +; CHECK-SPIRV: TypeFloat [[#FloatTy:]] 32 +; CHECK-SPIRV: TypeJointMatrixINTEL [[#MatTy1:]] [[#FloatTy]] +; CHECK-SPIRV: TypeStruct [[#StructTy1:]] [[#MatTy1]] +; CHECK-SPIRV: TypeArray [[#ArrayTy1:]] [[#StructTy1]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy1]] [[#]] +; CHECK-SPIRV: TypeJointMatrixINTEL [[#MatTy2:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeStruct [[#StructTy2:]] [[#MatTy2]] +; CHECK-SPIRV: TypeArray [[#ArrayTy2:]] [[#StructTy2]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy2]] [[#]] +; CHECK-SPIRV: TypeJointMatrixINTEL [[#MatTy3:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeStruct [[#StructTy3:]] [[#MatTy3]] +; CHECK-SPIRV: TypeArray [[#ArrayTy3:]] [[#StructTy3]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy3]] [[#]] + +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) } +; CHECK-LLVM: alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]] + +; ModuleID = 'test.bc' +source_filename = "llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::__generated_multi_ptr" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.0" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.1" = type { ptr addrspace(1) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) } +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } + +$_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_ = comdat any + +@__spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_(ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr") align 8 %_arg_pA, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.0") align 8 %_arg_pB, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.1") align 8 %_arg_pC) local_unnamed_addr #0 comdat !srcloc !59 !kernel_arg_buffer_location !60 !intel_reqd_sub_group_size !61 !sycl_fixed_targets !62 !sycl_kernel_omit_args !63 { +entry: + call void @__itt_offload_wi_start_wrapper() + %tC.i = alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], align 8 + %tA.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], align 8 + %tB.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], align 8 + %0 = load i64, ptr %_arg_pA, align 8, !tbaa !64 + %1 = inttoptr i64 %0 to ptr addrspace(1) + %2 = load i64, ptr %_arg_pB, align 8, !tbaa !64 + %3 = inttoptr i64 %2 to ptr addrspace(1) + %4 = load i64, ptr %_arg_pC, align 8, !tbaa !64 + %5 = inttoptr i64 %4 to ptr addrspace(1) + %6 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, i64 8), align 8, !noalias !68 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32, !noalias !68 + %8 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !75 + %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !75 + %cmp.i.i = icmp ult i64 %6, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + %cmp.i208.i = icmp ult i64 %7, 2147483648 + tail call void @llvm.assume(i1 %cmp.i208.i) + %cmp.i209.i = icmp ult i64 %8, 2147483648 + tail call void @llvm.assume(i1 %cmp.i209.i) + %cmp.i212.i = icmp ult i64 %9, 2147483648 + tail call void @llvm.assume(i1 %cmp.i212.i) + %div205.i = lshr i64 %9, 4 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tC.i) #4 + br label %arrayctor.loop.i + +arrayctor.loop.i: ; preds = %arrayctor.loop.i, %entry + %arrayctor.cur.idx.i = phi i64 [ 0, %entry ], [ %arrayctor.cur.add.i, %arrayctor.loop.i ] + %arrayctor.cur.add.i = add nuw nsw i64 %arrayctor.cur.idx.i, 1 + %arrayctor.done.i = icmp eq i64 %arrayctor.cur.add.i, 16 + br i1 %arrayctor.done.i, label %for.cond.i, label %arrayctor.loop.i + +for.cond.i: ; preds = %arrayctor.loop.i, %for.cond.cleanup7.i + %m.0.i = phi i32 [ %inc12.i, %for.cond.cleanup7.i ], [ 0, %arrayctor.loop.i ] + %cmp.i = icmp ult i32 %m.0.i, 4 + br i1 %cmp.i, label %for.cond5.preheader.i, label %for.cond14.preheader.i + +for.cond5.preheader.i: ; preds = %for.cond.i + %idxprom.i = zext i32 %m.0.i to i64 + br label %for.cond5.i + +for.cond14.preheader.i: ; preds = %for.cond.i + %mul50.i = shl nuw nsw i64 %6, 8 + %mul51.i = shl nuw nsw i64 %8, 5 + %add52.i = add nuw nsw i64 %mul50.i, %mul51.i + %mul80.i = shl nuw nsw i64 %div205.i, 7 + %10 = shl nuw nsw i64 %7, 9 + %11 = add nuw nsw i64 %10, %mul80.i + br label %for.cond14.i + +for.cond5.i: ; preds = %for.body8.i, %for.cond5.preheader.i + %n.0.i = phi i32 [ %inc.i, %for.body8.i ], [ 0, %for.cond5.preheader.i ] + %cmp6.i = icmp ult i32 %n.0.i, 4 + br i1 %cmp6.i, label %for.body8.i, label %for.cond.cleanup7.i + +for.cond.cleanup7.i: ; preds = %for.cond5.i + %inc12.i = add nuw nsw i32 %m.0.i, 1 + br label %for.cond.i, !llvm.loop !80 + +for.body8.i: ; preds = %for.cond5.i + %conv.i = zext i32 %n.0.i to i64 + %arrayidx10.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom.i, i64 %conv.i + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef 0.000000e+00) #5 + store target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %call.i.i, ptr %arrayidx10.i, align 8, !tbaa !82 + %inc.i = add nuw nsw i32 %n.0.i, 1 + br label %for.cond5.i, !llvm.loop !84 + +for.cond14.i: ; preds = %for.cond.cleanup34.i, %for.cond14.preheader.i + %k2.0.i = phi i32 [ %inc129.i, %for.cond.cleanup34.i ], [ 0, %for.cond14.preheader.i ] + %cmp15.i = icmp ult i32 %k2.0.i, 8 + br i1 %cmp15.i, label %for.body17.i, label %for.cond132.preheader.i + +for.cond132.preheader.i: ; preds = %for.cond14.i + %mul156.i = shl nuw nsw i64 %7, 8 + %mul157.i = shl nuw nsw i64 %div205.i, 6 + %add158.i = add nuw nsw i64 %mul156.i, %mul157.i + br label %for.cond132.i + +for.body17.i: ; preds = %for.cond14.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tA.i) #4 + br label %arrayctor.loop20.i + +arrayctor.loop20.i: ; preds = %arrayctor.loop20.i, %for.body17.i + %arrayctor.cur21.idx.i = phi i64 [ 0, %for.body17.i ], [ %arrayctor.cur21.add.i, %arrayctor.loop20.i ] + %arrayctor.cur21.add.i = add nuw nsw i64 %arrayctor.cur21.idx.i, 1 + %arrayctor.done23.i = icmp eq i64 %arrayctor.cur21.add.i, 8 + br i1 %arrayctor.done23.i, label %arrayctor.cont24.i, label %arrayctor.loop20.i + +arrayctor.cont24.i: ; preds = %arrayctor.loop20.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tB.i) #4 + br label %arrayctor.loop27.i + +arrayctor.loop27.i: ; preds = %arrayctor.loop27.i, %arrayctor.cont24.i + %arrayctor.cur28.idx.i = phi i64 [ 0, %arrayctor.cont24.i ], [ %arrayctor.cur28.add.i, %arrayctor.loop27.i ] + %arrayctor.cur28.add.i = add nuw nsw i64 %arrayctor.cur28.idx.i, 1 + %arrayctor.done30.i = icmp eq i64 %arrayctor.cur28.add.i, 8 + br i1 %arrayctor.done30.i, label %for.cond32.preheader.i, label %arrayctor.loop27.i + +for.cond32.preheader.i: ; preds = %arrayctor.loop27.i + %12 = shl nuw i32 %k2.0.i, 1 + br label %for.cond32.i + +for.cond32.i: ; preds = %for.cond.cleanup92.i, %for.cond32.preheader.i + %k1.0.i = phi i32 [ %inc126.i, %for.cond.cleanup92.i ], [ 0, %for.cond32.preheader.i ] + %cmp33.i = icmp ult i32 %k1.0.i, 2 + br i1 %cmp33.i, label %for.body35.i, label %for.cond.cleanup34.i + +for.cond.cleanup34.i: ; preds = %for.cond32.i + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tB.i) #4 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tA.i) #4 + %inc129.i = add nuw nsw i32 %k2.0.i, 1 + br label %for.cond14.i, !llvm.loop !85 + +for.body35.i: ; preds = %for.cond32.i + %13 = add nuw i32 %12, %k1.0.i + %div37206.i = and i32 %13, 268435455 + %idxprom46.i = zext i32 %k1.0.i to i64 + %mul57.i = shl nuw nsw i32 %div37206.i, 4 + %conv58.i = zext i32 %mul57.i to i64 + %invariant.gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %1, i64 %conv58.i + br label %for.cond39.i + +for.cond39.i: ; preds = %for.body42.i, %for.body35.i + %m38.0.i = phi i32 [ 0, %for.body35.i ], [ %inc60.i, %for.body42.i ] + %cmp40.i = icmp ult i32 %m38.0.i, 4 + br i1 %cmp40.i, label %for.body42.i, label %for.cond63.preheader.i + +for.cond63.preheader.i: ; preds = %for.cond39.i + %mul77.i = shl nuw nsw i32 %div37206.i, 12 + %conv78.i = zext i32 %mul77.i to i64 + %add.ptr.i225.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %3, i64 %conv78.i + br label %for.cond63.i + +for.body42.i: ; preds = %for.cond39.i + %idxprom44.i = zext i32 %m38.0.i to i64 + %arrayidx47.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom44.i, i64 %idxprom46.i + %mul53.i = shl nuw nsw i32 %m38.0.i, 3 + %conv54.i = zext i32 %mul53.i to i64 + %add55.i = add nuw nsw i64 %add52.i, %conv54.i + %mul56.i = shl nuw nsw i64 %add55.i, 8 + %gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %invariant.gep, i64 %mul56.i + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) %call1.i.i, ptr %arrayidx47.i, align 8, !tbaa !86 + %inc60.i = add nuw nsw i32 %m38.0.i, 1 + br label %for.cond39.i, !llvm.loop !88 + +for.cond63.i: ; preds = %for.body67.i, %for.cond63.preheader.i + %n62.0.i = phi i32 [ %inc87.i, %for.body67.i ], [ 0, %for.cond63.preheader.i ] + %cmp65.i = icmp ult i32 %n62.0.i, 4 + br i1 %cmp65.i, label %for.body67.i, label %for.cond90.i + +for.body67.i: ; preds = %for.cond63.i + %conv64.i = zext i32 %n62.0.i to i64 + %arrayidx72.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv64.i, i64 %idxprom46.i + %14 = shl nuw nsw i64 %conv64.i, 5 + %mul85.i = add nuw nsw i64 %14, %11 + %add.ptr.i226.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i225.i, i64 %mul85.i + %call1.i219.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i226.i, i64 noundef 512, i32 noundef 2, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) %call1.i219.i, ptr %arrayidx72.i, align 8, !tbaa !89 + %inc87.i = add nuw nsw i32 %n62.0.i, 1 + br label %for.cond63.i, !llvm.loop !91 + +for.cond90.i: ; preds = %for.cond63.i, %for.cond.cleanup98.i + %m89.0.i = phi i32 [ %inc123.i, %for.cond.cleanup98.i ], [ 0, %for.cond63.i ] + %cmp91.i = icmp ult i32 %m89.0.i, 4 + br i1 %cmp91.i, label %for.cond95.preheader.i, label %for.cond.cleanup92.i + +for.cond95.preheader.i: ; preds = %for.cond90.i + %idxprom102.i = zext i32 %m89.0.i to i64 + %arrayidx105.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom102.i, i64 %idxprom46.i + %15 = load target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1), ptr %arrayidx105.i, align 8, !tbaa !86, !noalias !92 + br label %for.cond95.i + +for.cond.cleanup92.i: ; preds = %for.cond90.i + %inc126.i = add nuw nsw i32 %k1.0.i, 1 + br label %for.cond32.i, !llvm.loop !95 + +for.cond95.i: ; preds = %for.body99.i, %for.cond95.preheader.i + %n94.0.i = phi i32 [ %inc120.i, %for.body99.i ], [ 0, %for.cond95.preheader.i ] + %cmp97.i = icmp ult i32 %n94.0.i, 4 + br i1 %cmp97.i, label %for.body99.i, label %for.cond.cleanup98.i + +for.cond.cleanup98.i: ; preds = %for.cond95.i + %inc123.i = add nuw nsw i32 %m89.0.i, 1 + br label %for.cond90.i, !llvm.loop !96 + +for.body99.i: ; preds = %for.cond95.i + %conv96.i = zext i32 %n94.0.i to i64 + %arrayidx109.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv96.i, i64 %idxprom46.i + %arrayidx113.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom102.i, i64 %conv96.i + %16 = load target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1), ptr %arrayidx109.i, align 8, !tbaa !89, !noalias !92 + %17 = load target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2), ptr %arrayidx113.i, align 8, !tbaa !82, !noalias !92 + %call.i221.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef %15, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef %16, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %17, i32 noundef 3) #5, !noalias !92 + store target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %call.i221.i, ptr %arrayidx113.i, align 8, !tbaa !82 + %inc120.i = add nuw nsw i32 %n94.0.i, 1 + br label %for.cond95.i, !llvm.loop !97 + +for.cond132.i: ; preds = %for.cond.cleanup140.i, %for.cond132.preheader.i + %m131.0.i = phi i32 [ %inc166.i, %for.cond.cleanup140.i ], [ 0, %for.cond132.preheader.i ] + %cmp133.i = icmp ult i32 %m131.0.i, 4 + br i1 %cmp133.i, label %for.cond137.preheader.i, label %_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit + +for.cond137.preheader.i: ; preds = %for.cond132.i + %idxprom143.i = zext i32 %m131.0.i to i64 + %mul152.i = shl nuw nsw i32 %m131.0.i, 3 + %conv153.i = zext i32 %mul152.i to i64 + %add154.i = add nuw nsw i64 %add52.i, %conv153.i + %mul155.i = shl nuw nsw i64 %add154.i, 8 + %add.ptr.i227.i = getelementptr inbounds float, ptr addrspace(1) %5, i64 %mul155.i + br label %for.cond137.i + +for.cond137.i: ; preds = %for.body141.i, %for.cond137.preheader.i + %n136.0.i = phi i32 [ %inc163.i, %for.body141.i ], [ 0, %for.cond137.preheader.i ] + %cmp139.i = icmp ult i32 %n136.0.i, 4 + br i1 %cmp139.i, label %for.body141.i, label %for.cond.cleanup140.i + +for.cond.cleanup140.i: ; preds = %for.cond137.i + %inc166.i = add nuw nsw i32 %m131.0.i, 1 + br label %for.cond132.i, !llvm.loop !98 + +for.body141.i: ; preds = %for.cond137.i + %conv138.i = zext i32 %n136.0.i to i64 + %arrayidx146.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom143.i, i64 %conv138.i + %mul160.i = shl nuw nsw i64 %conv138.i, 4 + %add161.i = add nuw nsw i64 %add158.i, %mul160.i + %add.ptr.i228.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i227.i, i64 %add161.i + %18 = load target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2), ptr %arrayidx146.i, align 8, !tbaa !82 + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i228.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %18, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + %inc163.i = add nuw nsw i32 %n136.0.i, 1 + br label %for.cond137.i, !llvm.loop !99 + +_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond132.i + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tC.i) #4 + call void @__itt_offload_wi_finish_wrapper() + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +declare void @__itt_offload_wi_start_wrapper() + +declare void @__itt_offload_wi_finish_wrapper() + +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +attributes #3 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind } +attributes #5 = { convergent nounwind } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57} +!llvm.ident = !{!58} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"cpu", i32 1} +!5 = !{!"gpu", i32 2} +!6 = !{!"accelerator", i32 3} +!7 = !{!"custom", i32 4} +!8 = !{!"fp16", i32 5} +!9 = !{!"fp64", i32 6} +!10 = !{!"image", i32 9} +!11 = !{!"online_compiler", i32 10} +!12 = !{!"online_linker", i32 11} +!13 = !{!"queue_profiling", i32 12} +!14 = !{!"usm_device_allocations", i32 13} +!15 = !{!"usm_host_allocations", i32 14} +!16 = !{!"usm_shared_allocations", i32 15} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"int64_base_atomics", i32 7} +!54 = !{!"int64_extended_atomics", i32 8} +!55 = !{!"usm_system_allocator", i32 17} +!56 = !{!"usm_restricted_shared_allocations", i32 16} +!57 = !{!"host", i32 0} +!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!59 = !{i32 242145} +!60 = !{i32 -1, i32 -1, i32 -1} +!61 = !{i32 16} +!62 = !{} +!63 = !{i1 false, i1 false, i1 false} +!64 = !{!65, !65, i64 0} +!65 = !{!"any pointer", !66, i64 0} +!66 = !{!"omnipotent char", !67, i64 0} +!67 = !{!"Simple C++ TBAA"} +!68 = !{!69, !71, !73} +!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!75 = !{!76, !78, !73} +!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!80 = distinct !{!80, !81} +!81 = !{!"llvm.loop.mustprogress"} +!82 = !{!83, !65, i64 0} +!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!84 = distinct !{!84, !81} +!85 = distinct !{!85, !81} +!86 = !{!87, !65, i64 0} +!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!88 = distinct !{!88, !81} +!89 = !{!90, !65, i64 0} +!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!91 = distinct !{!91, !81} +!92 = !{!93} +!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!95 = distinct !{!95, !81} +!96 = distinct !{!96, !81} +!97 = distinct !{!97, !81} +!98 = distinct !{!98, !81} +!99 = distinct !{!99, !81} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll new file mode 100644 index 0000000000..237c05688b --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll @@ -0,0 +1,79 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_joint_matrix,+SPV_INTEL_bfloat16_conversion -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-OCL-IR + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc --spirv-target-env=SPV-IR +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR + +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_bfloat16_conversion 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR-NEXT: ConvertFToBF16INTEL +; CHECK-ERROR-NEXT: Can be used with cooperative matrices only when SPV_INTEL_joint_matrix is enabled + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Capability Bfloat16ConversionINTEL +; CHECK-SPIRV-DAG: Capability JointMatrixBF16ComponentTypeINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_bfloat16_conversion" +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: TypeInt [[#ShortTy:]] 16 0 +; CHECK-SPIRV-DAG: TypeFloat [[#FP32Ty:]] 32 +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#FP32MatTy:]] [[#FP32Ty]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#ShortMatTy:]] [[#ShortTy]] +; CHECK-SPIRV: CompositeConstruct [[#FP32MatTy]] [[#FP32Mat:]] +; CHECK-SPIRV: ConvertFToBF16INTEL [[#ShortMatTy]] [[#]] [[#FP32Mat]] +; CHECK-SPIRV: CompositeConstruct [[#ShortMatTy]] [[#ShortMat:]] +; CHECK-SPIRV: ConvertBF16ToFINTEL [[#FP32MatTy]] [[#]] [[#ShortMat]] + +; CHECK-OCL-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z32intel_convert_bfloat16_as_ushortPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#FP32Matrix]]) +; CHECK-OCL-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z31intel_convert_as_bfloat16_floatPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#ShortMatrix]]) + + +; CHECK-SPV-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z27__spirv_ConvertFToBF16INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#FP32Matrix]]) +; CHECK-SPV-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z27__spirv_ConvertBF16ToFINTELPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#ShortMatrix]]) + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_bf() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) + ret void +} + +define void @convert_bf_to_f() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %0) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) noundef) + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"PIE Level", i32 2} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{!"clang version 17.0.0"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll new file mode 100644 index 0000000000..b0f97b74d7 --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll @@ -0,0 +1,149 @@ +;; compiled from joint_matrix_apply_bf16.cpp from intel/llvm with some modifications + +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Capability CooperativeMatrixInvocationInstructionsINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy:]] +; CHECK-SPIRV: CompositeConstruct [[#MatTy]] [[#Mat:]] +; CHECK-SPIRV: PtrCastToGeneric [[#]] [[#Ptr:]] [[#]] +; CHECK-SPIRV: CooperativeMatrixApplyFunctionINTEL [[#MatTy]] [[#Apply:]] [[#Ptr]] [[#Mat]] +; CHECK-SPIRV: CooperativeMatrixStoreKHR [[#]] [[#Apply]] + +; CHECK-LLVM: %[[Mat:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @"_Z26__spirv_CompositeConstructP38class.sycl::_V1::ext::oneapi::bfloat16" +; CHECK-LLVM: %[[Apply:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @"_Z43__spirv_CooperativeMatrixApplyFunctionINTELPU3AS477class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapperPU3AS144__spirv_CooperativeMatrixKHR__short_3_8_16_0"(ptr addrspace(4) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) %[[Mat]]) +; CHECK-LLVM: call spir_func void @"_Z33__spirv_CooperativeMatrixStoreKHRPU3AS138class.sycl::_V1::ext::oneapi::bfloat16PU3AS144__spirv_CooperativeMatrixKHR__short_3_8_16_0il"(ptr addrspace(1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) %[[Apply]], i32 0, i64 0) + +; ModuleID = 'matrix_apply.bc' +source_filename = "../llvm/sycl/test-e2e/Matrix/joint_matrix_apply_bf16.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [2 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapper" = type { ptr addrspace(4) } +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } +%class.anon.0 = type <{ %"class.sycl::_V1::accessor", %class.anon, [7 x i8] }> +%"class.sycl::_V1::accessor" = type { %"class.sycl::_V1::detail::AccessorImplDevice", %union.anon } +%"class.sycl::_V1::detail::AccessorImplDevice" = type { %"class.sycl::_V1::id", %"class.sycl::_V1::range", %"class.sycl::_V1::range" } +%union.anon = type { ptr addrspace(1) } +%class.anon = type { i8 } + +$_ZTSZZ17matrix_verify_addIN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32EZ4mainEUlRS4_E_EvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfOT2_ENKUlRNS1_7handlerEE_clESI_EUlNS1_7nd_itemILi2EEEE_ = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ17matrix_verify_addIN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32EZ4mainEUlRS4_E_EvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfOT2_ENKUlRNS1_7handlerEE_clESI_EUlNS1_7nd_itemILi2EEEE_(ptr addrspace(1) noundef align 2 %_arg_accA, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accA1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accA2, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accA3) local_unnamed_addr { +entry: + %ref.tmp.i20 = alloca %"class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapper", align 8 + %agg.tmp.i17 = alloca %"class.sycl::_V1::ext::oneapi::bfloat16", align 2 + %ref.tmp6.i = alloca float, align 4 + %__SYCLKernel = alloca %class.anon.0, align 8 + %__SYCLKernel.ascast = addrspacecast ptr %__SYCLKernel to ptr addrspace(4) + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %__SYCLKernel) + %agg.tmp.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accA1, align 8 + %agg.tmp.sroa.0.sroa.2.0._arg_accA1.ascast.sroa_idx = getelementptr inbounds i8, ptr %_arg_accA1, i64 8 + %agg.tmp.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp.sroa.0.sroa.2.0._arg_accA1.ascast.sroa_idx, align 8 + %agg.tmp5.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accA2, align 8 + %agg.tmp5.sroa.0.sroa.2.0._arg_accA2.ascast.sroa_idx = getelementptr inbounds i8, ptr %_arg_accA2, i64 8 + %agg.tmp5.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp5.sroa.0.sroa.2.0._arg_accA2.ascast.sroa_idx, align 8 + %agg.tmp6.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accA3, align 8 + %agg.tmp6.sroa.0.sroa.2.0._arg_accA3.ascast.sroa_idx = getelementptr inbounds i8, ptr %_arg_accA3, i64 8 + %agg.tmp6.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp6.sroa.0.sroa.2.0._arg_accA3.ascast.sroa_idx, align 8 + %0 = getelementptr inbounds %"class.sycl::_V1::accessor", ptr %__SYCLKernel, i64 0, i32 1 + store i64 %agg.tmp6.sroa.0.sroa.0.0.copyload, ptr %__SYCLKernel, align 8 + %AccessRange.i.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 1 + store i64 %agg.tmp.sroa.0.sroa.0.0.copyload, ptr %AccessRange.i.i.i.i.i, align 8 + %MemRange.i.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 2 + store i64 %agg.tmp5.sroa.0.sroa.0.0.copyload, ptr %MemRange.i.i.i.i.i, align 8 + %arrayidx.i21.i.i.i.i = getelementptr inbounds [2 x i64], ptr %__SYCLKernel, i64 0, i64 1 + store i64 %agg.tmp6.sroa.0.sroa.2.0.copyload, ptr %arrayidx.i21.i.i.i.i, align 8 + %arrayidx.i25.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 1, i32 0, i32 0, i64 1 + store i64 %agg.tmp.sroa.0.sroa.2.0.copyload, ptr %arrayidx.i25.i.i.i.i, align 8 + %arrayidx.i29.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 2, i32 0, i32 0, i64 1 + store i64 %agg.tmp5.sroa.0.sroa.2.0.copyload, ptr %arrayidx.i29.i.i.i.i, align 8 + %mul.i6.i.i.i.i = mul i64 %agg.tmp6.sroa.0.sroa.0.0.copyload, %agg.tmp5.sroa.0.sroa.2.0.copyload + %1 = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %_arg_accA, i64 %mul.i6.i.i.i.i + %add.ptr.i = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %1, i64 %agg.tmp6.sroa.0.sroa.2.0.copyload + store ptr addrspace(1) %add.ptr.i, ptr %0, align 8 + %2 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 8), align 8 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %4 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8 + %5 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %ref.tmp6.ascast.i = addrspacecast ptr %ref.tmp6.i to ptr addrspace(4) + %cmp.i11 = icmp ult i64 %2, 2147483648 + %cmp.i = icmp ult i64 %3, 2147483648 + %cmp.i15 = icmp ult i64 %4, 2147483648 + %sub.i = sub nsw i64 %2, %4 + %cmp.i12 = icmp ult i64 %5, 2147483648 + %sub5.i = sub nsw i64 %3, %5 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %ref.tmp6.i) + store float 5.000000e+00, ptr %ref.tmp6.i, align 4 + %call.i.i = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) %ref.tmp6.ascast.i) + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %agg.tmp.i17) + store i16 %call.i.i, ptr %agg.tmp.i17, align 2 + %call.i18 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z26__spirv_CompositeConstruct(ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2 %agg.tmp.i17) + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %agg.tmp.i17) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %ref.tmp6.i) + %lambda.i = getelementptr inbounds %class.anon.0, ptr addrspace(4) %__SYCLKernel.ascast, i64 0, i32 1 + %ref.tmp.ascast.i21 = addrspacecast ptr %ref.tmp.i20 to ptr addrspace(4) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp.i20) + store ptr addrspace(4) %lambda.i, ptr %ref.tmp.i20, align 8 + %call.i22 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef %call.i18) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp.i20) + %6 = load ptr addrspace(1), ptr %0, align 8 + %7 = load i64, ptr %__SYCLKernel, align 8 + %8 = load i64, ptr %arrayidx.i29.i.i.i.i, align 8 + %mul.i6.i.i.i.i.i = mul i64 %7, %8 + %9 = load i64, ptr %arrayidx.i21.i.i.i.i, align 8 + %add.i7.i.i.i.i.i = add i64 %mul.i6.i.i.i.i.i, %9 + %idx.neg.i.i = sub i64 0, %add.i7.i.i.i.i.i + %add.ptr.i.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %6, i64 %idx.neg.i.i + %mul12.i = shl nsw i64 %sub.i, 8 + %add.ptr.i43 = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i.i, i64 %mul12.i + %div14.i = and i64 %sub5.i, -16 + %add.ptr.i44 = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i43, i64 %div14.i + call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef %add.ptr.i44, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef %call.i22, i32 noundef 0, i64 noundef 0) + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %__SYCLKernel) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z26__spirv_CompositeConstruct(ptr noundef byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2) local_unnamed_addr + +; Function Attrs: convergent nounwind +declare dso_local spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4)) local_unnamed_addr + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8), target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef) local_unnamed_addr + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef, i32 noundef, i64 noundef) local_unnamed_addr + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git)"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll index a6f24bc596..03dfbdfeb8 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll @@ -20,9 +20,9 @@ ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const3]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const2]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const3]] [[#Const12]] [[#Const48]] [[#Const0]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const1]] ; CHECK-SPIRV: CompositeConstruct [[#MatTy1]] ; CHECK-SPIRV: CooperativeMatrixLoadKHR [[#MatTy2]] [[#Load1:]] ; CHECK-SPIRV: CooperativeMatrixLengthKHR [[#Int32Ty]] [[#]] [[#MatTy2]] @@ -31,14 +31,14 @@ ; CHECK-SPIRV: CooperativeMatrixMulAddKHR [[#MatTy1]] ; CHECK-SPIRV: CooperativeMatrixStoreKHR - -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i32 0, i32 1, i32 1, i32 0, i64 %_arg_K) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3PU3AS4clii(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i64 %_arg_K, i32 0, i32 1) -; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_3PU3AS4cl -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3PU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_3PU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3i(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) -; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(4) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 12, i32 48, i32 0, i32 0, i64 %_arg_K) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0PU3AS4cili(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i64 %_arg_K, i32 1) +; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) +; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 12, i32 48, i32 0, i32 0, i64 %mul22.i) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS4cil +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHRPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0PU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2i(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) +; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2ili(ptr addrspace(4) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) ; ModuleID = 'test-matrix-opaque.bc' source_filename = "matrix-int8-test.cpp" @@ -57,8 +57,8 @@ $_ZTSZZ15matrix_multiply = comdat any ; Function Attrs: convergent norecurse define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiply(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { entry: - %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 - %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 + %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 + %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 %agg.tmp16.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accB6, align 8 @@ -80,8 +80,8 @@ entry: %cmp.i58.i = icmp ult i64 %5, 2147483648 %sub5.i = sub nsw i64 %2, %5 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i) - %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i.i, ptr %sub_c.sroa.0.i, align 8 + %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 %mul.i = mul nsw i64 %sub.i, 12 %div2452.i = lshr i64 %sub5.i, 4 %mul26.i = mul i64 %div2452.i, 48 @@ -105,20 +105,20 @@ for.body.i: ; preds = %for.cond.i %conv13.i = zext i32 %mul12.i to i64 %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) - tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 1, i32 noundef 0, i64 noundef %_arg_K) #4 - %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 - %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %call1.i.i) + tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %_arg_K) + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i64 noundef %_arg_K, i32 noundef 1) #4 + %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) %call1.i.i) %div20.i = mul nsw i32 %k.0.i, 12 %conv21.i = zext i32 %div20.i to i64 %mul23.i = mul i64 %mul22.i, %conv21.i %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) - tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 1, i32 noundef 0, i64 noundef %mul22.i) #4 - %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 + tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %mul22.i) + %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i64 noundef %mul22.i) #4 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %ref.tmp29.sroa.0.i, align 8 store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %sub_c.sroa.0.i, align 8 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i) @@ -131,31 +131,31 @@ _ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6 %mul39.i = mul nuw i64 %div2452.i, 12 %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i) ret void } ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 -declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef) +declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) noundef) -; Function Attrs: convergent -declare dso_local spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll index 72010ed93e..71ea0a8afe 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll @@ -1,207 +1,158 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -; RUN: llvm-dis -opaque-pointers=0 %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV: Capability JointMatrixINTEL -; CHECK-SPIRV: Extension "SPV_INTEL_joint_matrix" -; CHECK-SPIRV: Name [[#Kernel:]] "_ZTSZ4mainE11matrix_test" - -; CHECK-SPIRV-DAG: TypeInt [[#ShortTy:]] 16 0 -; CHECK-SPIRV-DAG: TypeInt [[#CharTy:]] 8 0 -; CHECK-SPIRV-DAG: TypeInt [[#IntTy:]] 32 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Zero:]] 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Two:]] 2 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Three:]] 3 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Sixteen:]] 16 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#FortyTwo:]] 42 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#CTy:]] [[#ShortTy]] [[#Two]] [[#Two]] [[#Zero]] [[#Three]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#ATy:]] [[#CharTy]] [[#Two]] [[#Sixteen]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#BTy:]] [[#CharTy]] [[#Sixteen]] [[#Two]] [[#Three]] [[#Three]] - -; CHECK-SPIRV: Function [[#]] [[#Kernel]] -; CHECK-SPIRV: FunctionParameter -; CHECK-SPIRV: FunctionParameter [[#]] [[#Stride:]] - -; CHECK-SPIRV: Label [[#Entry:]] -; CHECK-SPIRV: JointMatrixLoadINTEL [[#CTy]] [[#CLoaded:]] [[#Cptr:]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] - -; CHECK-SPIRV: Phi [[#CTy]] [[#C:]] [[#CLoaded]] [[#Entry]] [[#CMad:]] [[#ForBody:]] - -; CHECK-SPIRV: Label [[#ForBody]] -; CHECK-SPIRV: JointMatrixLoadINTEL [[#ATy]] [[#A:]] [[#Aptr:]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: JointMatrixLoadINTEL [[#BTy]] [[#B:]] [[#Bptr:]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: JointMatrixMadINTEL [[#CTy]] [[#CMad]] [[#A]] [[#B]] [[#C]] [[#Three]] -; CHECK-SPIRV: JointMatrixSUMadINTEL [[#CTy]] [[#UnusedMad1:]] [[#A]] [[#B]] [[#C]] [[#Three]] -; CHECK-SPIRV: JointMatrixUSMadINTEL [[#CTy]] [[#UnusedMad2:]] [[#A]] [[#B]] [[#C]] [[#Three]] -; CHECK-SPIRV: JointMatrixUUMadINTEL [[#CTy]] [[#UnusedMad3:]] [[#A]] [[#B]] [[#C]] [[#Three]] - -; CHECK-SPIRV: JointMatrixStoreINTEL [[#Cptr:]] [[#C]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: CompositeConstruct [[#CTy]] [[#Cnew:]] [[#FortyTwo]] -; CHECK-SPIRV: Store [[#PtrToZero:]] [[#Zero]] -; CHECK-SPIRV: Load [[#]] [[#ZeroLoad:]] [[#PtrToZero]] -; CHECK-SPIRV: CompositeConstruct [[#CTy]] [[#CnewLoad:]] [[#ZeroLoad]] - - -; CHECK-LLVM: %spirv.JointMatrixINTEL._short_2_2_0_3 -; CHECK-LLVM: %spirv.JointMatrixINTEL._char_2_16_0_3_0 -; CHECK-LLVM: %spirv.JointMatrixINTEL._char_16_2_3_3 - -; CHECK-LLVM: [[CLoaded:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z77__spirv_JointMatrixLoadINTEL_RPU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3PU3AS4sliii(i16 addrspace(4)* [[CPtr:%.*]], i64 [[Stride:%.*]], i32 0, i32 3, i32 0) -; CHECK-LLVM: [[C:%.*]] = phi %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [ [[CLoaded]], %entry ], [ [[CMad:%.*]], %for.body.i ] -; CHECK-LLVM: [[A:%.*]] = call spir_func %spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* @_Z79__spirv_JointMatrixLoadINTEL_RPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS4cliii(i8 addrspace(4)* [[APtr:%.*]], i64 [[Stride]], i32 0, i32 3, i32 0) -; CHECK-LLVM: [[B:%.*]] = call spir_func %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* @_Z77__spirv_JointMatrixLoadINTEL_RPU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS4cliii(i8 addrspace(4)* [[BPtr:%.*]], i64 [[Stride]], i32 0, i32 3, i32 0) -; CHECK-LLVM: [[CMad1:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z27__spirv_JointMatrixMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) -; CHECK-LLVM: [[CMad2:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z29__spirv_JointMatrixSUMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) -; CHECK-LLVM: [[CMad3:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z29__spirv_JointMatrixUSMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) -; CHECK-LLVM: [[CMad4:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z29__spirv_JointMatrixUUMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) - -; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS4sPU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3liii(i16 addrspace(4)* [[CPtr]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i64 [[Stride]], i32 0, i32 3, i32 0) -; CHECK-LLVM: call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z26__spirv_CompositeConstructi(i32 42) -; CHECK-LLVM: store i32 0, i32 addrspace(4)* [[StoredZero:%.*]], align 4 -; CHECK-LLVM: [[LoadedZero:%.*]] = load i32, i32 addrspace(4)* [[StoredZero]], align 8 -; CHECK-LLVM: call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z26__spirv_CompositeConstructi(i32 [[LoadedZero]]) - -; ModuleID = 'joint_matrix_test-sycl-spir64-unknown-unknown.bc' -source_filename = "./joint_matrix_test.cpp" +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability JointMatrixINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: TypeInt [[#Int8Ty:]] 8 0 +; CHECK-SPIRV-DAG: TypeInt [[#Int32Ty:]] 32 0 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const12:]] 12 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const0:]] 0 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const48:]] 48 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Int32Ty]] [[#Const12]] [[#Const12]] [[#Const3]] [[#Const3]] [[#Const2]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#Int8Ty]] [[#Const12]] [[#Const48]] [[#Const0]] [[#Const3]] [[#Const0]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#Int8Ty]] [[#Const48]] [[#Const12]] [[#Const2]] [[#Const3]] [[#Const1]] +; CHECK-SPIRV: CompositeConstruct [[#MatTy1]] +; CHECK-SPIRV: JointMatrixLoadINTEL [[#MatTy2]] +; CHECK-SPIRV: JointMatrixLoadINTEL [[#MatTy3]] +; CHECK-SPIRV: JointMatrixMadINTEL [[#MatTy1]] +; CHECK-SPIRV: JointMatrixStoreINTEL + +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__char_12_48_0_3_0PU3AS4cliii +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__char_48_12_2_3_1PU3AS4cliii +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELPU3AS142__spirv_JointMatrixINTEL__char_12_48_0_3_0PU3AS142__spirv_JointMatrixINTEL__char_48_12_2_3_1PU3AS142__spirv_JointMatrixINTEL__uint_12_12_3_3_2i(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) %{{.*}}, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) %{{.*}}, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) +; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS4iPU3AS142__spirv_JointMatrixINTEL__uint_12_12_3_3_2liii(ptr addrspace(4) %call.ascast.i.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) + +; ModuleID = 'test-matrix-opaque.bc' +source_filename = "matrix-int8-test.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -%spirv.JointMatrixINTEL._short_2_2_0_3 = type { [2 x [2 x [1 x [4 x [4 x i16]]]]]* } -%spirv.JointMatrixINTEL._char_2_16_0_3_0 = type { [2 x [16 x [1 x [4 x [1 x i8]]]]]* } -%spirv.JointMatrixINTEL._char_16_2_3_3 = type { [16 x [2 x [4 x [4 x i8]]]]* } +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [2 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } -$_ZTSZ4mainE11matrix_test = comdat any +$_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix = comdat any @__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 @__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 ; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11matrix_test(i16 addrspace(1)* %_arg_, i64 %_arg_1, i8 addrspace(1)* %_arg_3, i8 addrspace(1)* %_arg_5) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !5 !intel_reqd_sub_group_size !6 { +define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { entry: - %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !7 - %1 = extractelement <3 x i64> %0, i64 1 - %2 = extractelement <3 x i64> %0, i64 0 - %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInLocalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !14 - %4 = extractelement <3 x i64> %3, i64 1 - %5 = extractelement <3 x i64> %3, i64 0 - %cmp.i.i = icmp ult i64 %1, 2147483648 - tail call void @llvm.assume(i1 %cmp.i.i) - %cmp.i45.i = icmp ult i64 %2, 2147483648 - tail call void @llvm.assume(i1 %cmp.i45.i) - %cmp.i43.i = icmp ult i64 %4, 2147483648 - tail call void @llvm.assume(i1 %cmp.i43.i) - %sub.i = sub nsw i64 %1, %4 - %cmp.i41.i = icmp ult i64 %5, 2147483648 - tail call void @llvm.assume(i1 %cmp.i41.i) - %sub5.i = sub nsw i64 %2, %5 - %mul6.i = shl nsw i64 %sub.i, 6 - %add.ptr.i51 = getelementptr inbounds i16, i16 addrspace(1)* %_arg_, i64 %mul6.i - %add.ptr7.i52 = getelementptr inbounds i16, i16 addrspace(1)* %add.ptr.i51, i64 %sub5.i - %add.ptr7.i = addrspacecast i16 addrspace(1)* %add.ptr7.i52 to i16 addrspace(4)* - %call8.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i16 addrspace(4)* %add.ptr7.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %add.ptr11.i53 = getelementptr inbounds i8, i8 addrspace(1)* %_arg_3, i64 %mul6.i - %add.ptr16.i55 = getelementptr inbounds i8, i8 addrspace(1)* %_arg_5, i64 %sub5.i + %sub_c.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 + %ref.tmp29.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 + %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 + %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 + %0 = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 0 + %agg.tmp16.sroa.0.sroa.0.0.copyload = load i64, ptr %0, align 8 + %agg.tmp16.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 1 + %agg.tmp16.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp16.sroa.0.sroa.2.0..sroa_idx, align 8 + %mul.i4.i.i.i.i45 = mul i64 %agg.tmp16.sroa.0.sroa.0.0.copyload, %agg.tmp15.sroa.0.sroa.2.0.copyload + %add.i6.i.i.i.i46 = add i64 %mul.i4.i.i.i.i45, %agg.tmp16.sroa.0.sroa.2.0.copyload + %add.ptr.i47 = getelementptr inbounds i8, ptr addrspace(1) %_arg_accB, i64 %add.i6.i.i.i.i46 + %1 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %2 = extractelement <3 x i64> %1, i64 1 + %3 = extractelement <3 x i64> %1, i64 0 + %4 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %5 = extractelement <3 x i64> %4, i64 1 + %6 = extractelement <3 x i64> %4, i64 0 + %cmp.i.i = icmp ult i64 %2, 2147483648 + %cmp.i54.i = icmp ult i64 %3, 2147483648 + %cmp.i56.i = icmp ult i64 %5, 2147483648 + %sub.i = sub nsw i64 %2, %5 + %cmp.i58.i = icmp ult i64 %6, 2147483648 + %sub5.i = sub nsw i64 %3, %6 + %sub_c.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %sub_c.sroa.0.i to ptr + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef 0) #4 + store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 + %mul.i = mul nsw i64 %sub.i, 12 + %div2452.i = lshr i64 %sub5.i, 4 + %mul26.i = mul i64 %div2452.i, 48 + %div.i = udiv i64 %_arg_K, 48 + %mul11.i = mul i64 %mul.i, %_arg_K + %add.ptr.i93.i = getelementptr inbounds i8, ptr addrspace(1) %_arg_accA, i64 %mul11.i + %idx.neg.i.i104.i = sub i64 0, %add.i6.i.i.i.i46 + %add.ptr.i.i105141.i = getelementptr i8, ptr addrspace(1) %add.ptr.i47, i64 %mul26.i + %mul22.i = shl i64 %_arg_N, 2 + %add.ptr.i108140.i = getelementptr i8, ptr addrspace(1) %add.ptr.i.i105141.i, i64 %idx.neg.i.i104.i + %ref.tmp29.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %ref.tmp29.sroa.0.i to ptr + %7 = bitcast ptr %ref.tmp29.sroa.0.i to ptr + %8 = bitcast ptr %sub_c.sroa.0.i to ptr br label %for.cond.i for.cond.i: ; preds = %for.body.i, %entry %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] - %C.0.i = phi %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* [ %call8.i, %entry ], [ %call19.i, %for.body.i ] - %cmp.i = icmp ult i32 %k.0.i, 32 - br i1 %cmp.i, label %for.body.i, label %_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit + %conv.i = zext i32 %k.0.i to i64 + %cmp.i = icmp ugt i64 %div.i, %conv.i + br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit for.body.i: ; preds = %for.cond.i - %idx.ext.i = zext i32 %k.0.i to i64 - %add.ptr12.i54 = getelementptr inbounds i8, i8 addrspace(1)* %add.ptr11.i53, i64 %idx.ext.i - %add.ptr12.i = addrspacecast i8 addrspace(1)* %add.ptr12.i54 to i8 addrspace(4)* - %call13.i = tail call spir_func %spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm2ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)* %add.ptr12.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %mul14.i = shl nuw nsw i32 %k.0.i, 5 - %idx.ext15.i = zext i32 %mul14.i to i64 - %add.ptr17.i56 = getelementptr inbounds i8, i8 addrspace(1)* %add.ptr16.i55, i64 %idx.ext15.i - %add.ptr17.i = addrspacecast i8 addrspace(1)* %add.ptr17.i56 to i8 addrspace(4)* - %call18.i = tail call spir_func %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm16ELm2ELN5__spv12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)* %add.ptr17.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %call19.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %call20.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixSUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %call21.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUSMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %call22.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %add.i = add nuw nsw i32 %k.0.i, 16 - br label %for.cond.i, !llvm.loop !19 - -_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit: ; preds = %for.cond.i - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(i16 addrspace(4)* %add.ptr7.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %C.0.i.new = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructi(i32 42) #1 - %ref.tmp = alloca i32, align 4 - %ref.tmp.ascast = addrspacecast i32* %ref.tmp to i32 addrspace(4)* - store i32 0, i32 addrspace(4)* %ref.tmp.ascast, align 4 - %zero = load i32, i32 addrspace(4)* %ref.tmp.ascast, align 8 - %C.0.i.new.load = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructi(i32 %zero) #1 + %mul12.i = mul nsw i32 %k.0.i, 48 + %conv13.i = zext i32 %mul12.i to i64 + %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i + %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 + %div20.i = mul nsw i32 %k.0.i, 12 + %conv21.i = zext i32 %div20.i to i64 + %mul23.i = mul i64 %mul22.i, %conv21.i + %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i + %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) + %call1.i73.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i, i32 noundef 2, i32 noundef 3, i32 noundef 0) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 + %call.i77.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef %call1.i.i, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef %call1.i73.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 3) #4 + store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 + %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %7, align 8 + store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %8, align 8 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) + %add.i = add nuw nsw i32 %k.0.i, 1 + br label %for.cond.i +_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond.i + %mul37.i = mul i64 %mul.i, %_arg_N + %add.ptr.i.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_accC, i64 %mul37.i + %mul39.i = mul nuw i64 %div2452.i, 12 + %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i + %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) ret void } ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i16 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm2ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm16ELm2ELN5__spv12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixSUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUSMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(i16 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructi(i32) #1 - -; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn -declare void @llvm.assume(i1 noundef) #2 - -attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="./joint_matrix_test.cpp" "uniform-work-group-size"="true" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { inaccessiblememonly nofree nosync nounwind willreturn } -attributes #3 = { convergent } - -!llvm.module.flags = !{!0, !1} -!opencl.spir.version = !{!2} -!spirv.Source = !{!3} -!llvm.ident = !{!4} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"frame-pointer", i32 2} -!2 = !{i32 1, i32 2} -!3 = !{i32 4, i32 100000} -!4 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git b3243d9f711a1cd80681530d6017324796668d51)"} -!5 = !{i32 -1, i32 -1, i32 -1, i32 -1} -!6 = !{i32 16} -!7 = !{!8, !10, !12} -!8 = distinct !{!8, !9, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!9 = distinct !{!9, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!10 = distinct !{!10, !11, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!11 = distinct !{!11, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!14 = !{!15, !17, !12} -!15 = distinct !{!15, !16, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!16 = distinct !{!16, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!17 = distinct !{!17, !18, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!18 = distinct !{!18, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!19 = distinct !{!19, !20, !21} -!20 = !{!"llvm.loop.mustprogress"} -!21 = !{!"llvm.loop.unroll.disable"} +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 +attributes #0 = { convergent norecurse "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="matrix-int8-test.cpp" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #4 = { convergent } diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll index 8109cb0ed1..80c014b689 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll @@ -1,218 +1,202 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc +; compiled from joint_matrix_bfloat16.cpp test from intel/llvm -; RUN: llvm-spirv -s %t.bc -opaque-pointers=0 -o %t.regularized.bc -; RUN: llvm-dis -opaque-pointers=0 %t.regularized.bc -o %t.regularized.ll -; RUN: FileCheck < %t.regularized.ll %s --check-prefix=CHECK-REGULARIZED +; RUN: llvm-as < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 --spirv-ext=+SPV_INTEL_bfloat16_conversion,+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_bfloat16_conversion,+SPV_INTEL_joint_matrix -o %t.spv ; RUN: llvm-spirv %t.spv -to-text -o %t.spt ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -; RUN: llvm-dis -opaque-pointers=0 < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-REGULARIZED: %[[Alloca:.*]] = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 -; CHECK-REGULARIZED: %[[ASCast:.*]] = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %[[Alloca]] to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* -; CHECK-REGULARIZED: %[[GEP1:.*]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %[[ASCast]], i64 0, i32 0 -; CHECK-REGULARIZED: %[[#Extract:]] = call spir_func i16 @_Z28__spirv_VectorExtractDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EET_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* align 2 %{{.*}}, i64 noundef %{{.*}}) -; CHECK-REGULARIZED: %[[#GEP2:]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %[[ASCast]], i32 0, i32 0 -; CHECK-REGULARIZED: store i16 %[[#Extract]], i16 addrspace(4)* %[[#GEP2]], align 2 -; CHECK-REGULARIZED: %[[#Load:]] = load i16, i16 addrspace(4)* %[[GEP1]], align 2 -; CHECK-REGULARIZED: %[[ConvertVal:.*]] = call spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext %[[#Load]]) -; CHECK-REGULARIZED: %{{.*}} = fadd float %[[ConvertVal]], %{{.*}} - -; CHECK-SPIRV: TypeInt [[#TypeI16ID:]] 16 0 -; CHECK-SPIRV: TypeFloat [[#TypeFID:]] 32 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#TypeJointMID:]] [[#TypeI16ID]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: Phi [[#TypeJointMID]] [[#PhiID:]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: VectorExtractDynamic [[#TypeI16ID]] [[#ExtractID:]] [[#PhiID]] [[#]] -; CHECK-SPIRV: Store [[#PtrID:]] [[#ExtractID]] [[#]] [[#]] -; CHECK-SPIRV: ConvertBF16ToFINTEL [[#TypeFID]] [[#Conv1ID:]] [[#]] -; CHECK-SPIRV: ConvertBF16ToFINTEL [[#TypeFID]] [[#Conv2ID:]] [[#]] -; CHECK-SPIRV: FAdd [[#TypeFID]] [[#ResId:]] [[#Conv1ID]] [[#Conv2ID]] -; CHECK-SPIRV: ConvertFToBF16INTEL [[#TypeI16ID]] [[#]] [[#ResId]] -; CHECK-SPIRV: Load [[#TypeI16ID]] [[#LoadID:]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: VectorInsertDynamic [[#TypeJointMID]] [[#]] [[#PhiID]] [[#LoadID]] [[#]] - -; CHECK-LLVM: %spirv.JointMatrixINTEL._short_8_16_0_3 -; CHECK-LLVM: %[[GEP1:.*]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %{{.*}}, i64 0, i32 0 -; CHECK-LLVM: %[[GEP2:.*]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %{{.*}}, i64 0, i32 0 -; CHECK-LLVM: %[[ConvertConst:.*]] = call spir_func i16 @_Z32intel_convert_bfloat16_as_ushortf(float 2.000000e+00) -; CHECK-LLVM: %[[#LoadGEP:]] = load i16, i16 addrspace(4)* %[[GEP2]], align 2 -; CHECK-LLVM: %[[ConvertVal:.*]] = call spir_func float @_Z31intel_convert_as_bfloat16_floats(i16 %[[#LoadGEP]]) -; CHECK-LLVM: %[[ConvertConstToF:.*]] = call spir_func float @_Z31intel_convert_as_bfloat16_floats(i16 %[[ConvertConst]]) -; CHECK-LLVM: %[[FAddRes:.*]] = fadd float %[[ConvertVal]], %[[ConvertConstToF]] -; CHECK-LLVM: %[[ConvertResToBF:.*]] = call spir_func i16 @_Z32intel_convert_bfloat16_as_ushortf(float %[[FAddRes]]) -; CHECK-LLVM: store i16 %[[ConvertResToBF]], i16 addrspace(4)* %[[#]], align 2 - -; ModuleID = 'joint_matrix_bfloat16_test.bc' -source_filename = "joint_matrix_bfloat16_test.cpp" +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: TypeInt [[#SHORT:]] 16 +; CHECK-SPIRV-DAG: TypeInt [[#INT:]] 32 +; CHECK-SPIRV-DAG: TypeFloat [[#Float:]] 32 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST8:]] 8 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST16:]] 16 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST3:]] 3 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST2:]] 2 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST1:]] 1 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST0:]] 0 +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Float]] [[#CONST8]] [[#CONST16]] [[#CONST3]] [[#CONST3]] [[#CONST2]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#SHORT]] [[#CONST8]] [[#CONST16]] [[#CONST0]] [[#CONST3]] [[#CONST0]] [[#CONST1]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#SHORT]] [[#CONST16]] [[#CONST16]] [[#CONST2]] [[#CONST3]] [[#CONST1]] [[#CONST1]] + +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2PU3AS1fliii(ptr addrspace(1) %{{.*}}, i64 32, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @"_Z82__spirv_JointMatrixLoadINTEL_RPU3AS144__spirv_JointMatrixINTEL__short_8_16_0_3_0_1PU3AS138class.sycl::_V1::ext::oneapi::bfloat16liii"(ptr addrspace(1) %{{.*}}, i64 32, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @"_Z83__spirv_JointMatrixLoadINTEL_RPU3AS145__spirv_JointMatrixINTEL__short_16_16_2_3_1_1PU3AS138class.sycl::_V1::ext::oneapi::bfloat16liii"(ptr addrspace(1) %{{.*}}, i64 64, i32 2, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELPU3AS144__spirv_JointMatrixINTEL__short_8_16_0_3_0_1PU3AS145__spirv_JointMatrixINTEL__short_16_16_2_3_1_1PU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2i(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) %{{.*}}, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i32 3) +; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS1fPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2liii(ptr addrspace(1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i64 32, i32 0, i32 3, i32 0) + +; ModuleID = 'joint_matrix_bfloat16-sycl-spir64-unknown-unknown.bc' +source_filename = "../joint_matrix_bfloat16.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -%class.anon = type { %"class.cl::sycl::accessor" } -%"class.cl::sycl::accessor" = type { %"class.cl::sycl::detail::AccessorImplDevice", %union.anon } -%"class.cl::sycl::detail::AccessorImplDevice" = type { %"class.cl::sycl::id", %"class.cl::sycl::range", %"class.cl::sycl::range" } -%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" } -%"class.cl::sycl::detail::array" = type { [2 x i64] } -%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" } -%union.anon = type { %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* } -%"class.cl::sycl::ext::intel::experimental::bfloat16" = type { i16 } -%"class.cl::sycl::nd_item" = type { %"class.cl::sycl::item", %"class.cl::sycl::item.0", %"class.cl::sycl::group" } -%"class.cl::sycl::item" = type { %"struct.cl::sycl::detail::ItemBase" } -%"struct.cl::sycl::detail::ItemBase" = type { %"class.cl::sycl::range", %"class.cl::sycl::id", %"class.cl::sycl::id" } -%"class.cl::sycl::item.0" = type { %"struct.cl::sycl::detail::ItemBase.1" } -%"struct.cl::sycl::detail::ItemBase.1" = type { %"class.cl::sycl::range", %"class.cl::sycl::id" } -%"class.cl::sycl::group" = type { %"class.cl::sycl::range", %"class.cl::sycl::range", %"class.cl::sycl::range", %"class.cl::sycl::id" } -%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 = type opaque - -$_ZZZ17matrix_verify_addIN2cl4sycl3ext5intel12experimental8bfloat16ELm16ELm16EEvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_ = comdat any - -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 - -; Function Attrs: convergent inlinehint norecurse -define linkonce_odr dso_local spir_func void @_ZZZ17matrix_verify_addIN2cl4sycl3ext5intel12experimental8bfloat16ELm16ELm16EEvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_(%class.anon addrspace(4)* noundef align 8 dereferenceable_or_null(56) %this, %"class.cl::sycl::nd_item"* noundef byval(%"class.cl::sycl::nd_item") align 8 %spmd_item) local_unnamed_addr #1 comdat align 2 { +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } + +$_ZTSZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_E7imatrix = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_E7imatrix(ptr addrspace(1) noundef align 4 %_arg_accC, ptr addrspace(1) noundef align 2 %_arg_accA, ptr addrspace(1) noundef align 2 %_arg_accB) local_unnamed_addr #0 comdat !srcloc !48 !kernel_arg_buffer_location !49 !kernel_arg_runtime_aligned !50 !kernel_arg_exclusive_ptr !50 !intel_reqd_sub_group_size !51 !sycl_fixed_targets !52 !sycl_kernel_omit_args !53 { entry: - %ref.tmp.i = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 - %agg.tmp.i54 = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 - %agg.tmp.i = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 - %spmd_item.ascast = addrspacecast %"class.cl::sycl::nd_item"* %spmd_item to %"class.cl::sycl::nd_item" addrspace(4)* - %arrayidx.i.i.i = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 0 - %0 = load i64, i64 addrspace(4)* %arrayidx.i.i.i, align 8, !tbaa !5 - %cmp.i = icmp ult i64 %0, 2147483648 - tail call void @llvm.assume(i1 %cmp.i) - %arrayidx.i.i.i29 = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 1 - %1 = load i64, i64 addrspace(4)* %arrayidx.i.i.i29, align 8, !tbaa !5 - %cmp.i30 = icmp ult i64 %1, 2147483648 - tail call void @llvm.assume(i1 %cmp.i30) - %arrayidx.i.i.i31 = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 1, i32 0, i32 1, i32 0, i32 0, i64 0 - %2 = load i64, i64 addrspace(4)* %arrayidx.i.i.i31, align 8, !tbaa !5 - %cmp.i32 = icmp ult i64 %2, 2147483648 - tail call void @llvm.assume(i1 %cmp.i32) - %arrayidx.i.i.i33 = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 1, i32 0, i32 1, i32 0, i32 0, i64 1 - %3 = load i64, i64 addrspace(4)* %arrayidx.i.i.i33, align 8, !tbaa !5 - %cmp.i34 = icmp ult i64 %3, 2147483648 - tail call void @llvm.assume(i1 %cmp.i34) - %4 = bitcast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i to i8* - call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %4) - %agg.tmp.ascast.i = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* - %call.i.i.i = tail call spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef 5.000000e+00) #6 - %value.i.i = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %agg.tmp.ascast.i, i64 0, i32 0 - store i16 %call.i.i.i, i16 addrspace(4)* %value.i.i, align 2, !tbaa !9 - %call.i = tail call spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESB_(%"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef nonnull byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2 %agg.tmp.i) #7 - call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %4) - %ref.tmp.ascast.i = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %ref.tmp.i to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* - %5 = bitcast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %ref.tmp.i to i8* - %value.i.i.i = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %ref.tmp.ascast.i, i64 0, i32 0 - %6 = bitcast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i54 to i8* - %7 = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i54, i64 0, i32 0 - %8 = addrspacecast i16* %7 to i16 addrspace(4)* - br label %for.cond - -for.cond: ; preds = %for.body, %entry - %sub_a.sroa.0.0 = phi %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* [ %call.i, %entry ], [ %call.i58, %for.body ] - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %conv = zext i32 %i.0 to i64 - %call.i41 = call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEmPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0) #7 - %cmp = icmp ugt i64 %call.i41, %conv - br i1 %cmp, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond - %sub5 = sub nsw i64 %1, %3 - %sub = sub nsw i64 %0, %2 - %MData.i.i.i = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %this, i64 0, i32 0, i32 1, i32 0 - %9 = load %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)*, %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* addrspace(4)* %MData.i.i.i, align 8, !tbaa !12, !noalias !13 - %mul19 = shl nsw i64 %sub, 7 - %add.ptr.i = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* %9, i64 %mul19 - %div = and i64 %sub5, -8 - %add.ptr.i45 = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* %add.ptr.i, i64 %div - %call.ascast.i = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* %add.ptr.i45 to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* - call spir_func void @_Z29__spirv_JointMatrixStoreINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEvPT_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEmS7_S9_i(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* noundef %call.ascast.i, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0, i64 noundef 16, i32 noundef 0, i32 noundef 3, i32 noundef 0) #7 + call void @__itt_offload_wi_start_wrapper() + %0 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 8), align 8, !noalias !54 + %1 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !54 + %2 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !61 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !61 + %cmp.i.i = icmp ult i64 %0, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + %cmp.i50.i = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i50.i) + %cmp.i52.i = icmp ult i64 %2, 2147483648 + tail call void @llvm.assume(i1 %cmp.i52.i) + %sub.i = sub nsw i64 %0, %2 + %cmp.i55.i = icmp ult i64 %3, 2147483648 + tail call void @llvm.assume(i1 %cmp.i55.i) + %sub5.i = sub nsw i64 %1, %3 + %mul8.i = shl nsw i64 %sub.i, 8 + %add.ptr.i.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accC, i64 %mul8.i + %div48.i = and i64 %sub5.i, -16 + %add.ptr.i69.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i, i64 %div48.i + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef %add.ptr.i69.i, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %mul28.i = shl nsw i64 %div48.i, 1 + %add.ptr.i84.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %_arg_accA, i64 %mul8.i + %invariant.gep = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %_arg_accB, i64 %mul28.i + br label %for.cond.i + +for.cond.i: ; preds = %for.body.i, %entry + %sub_c.sroa.0.0.i = phi target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) [ %call1.i.i, %entry ], [ %call.i.i, %for.body.i ] + %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] + %cmp.i = icmp ult i32 %k.0.i, 2 + br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit + +for.body.i: ; preds = %for.cond.i + %mul16.i = shl nuw nsw i32 %k.0.i, 4 + %conv17.i = zext i32 %mul16.i to i64 + %add.ptr.i85.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i84.i, i64 %conv17.i + %call1.i63.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i85.i, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %div23.i = shl nuw nsw i32 %k.0.i, 3 + %conv24.i = zext i32 %div23.i to i64 + %mul25.i = shl nuw nsw i64 %conv24.i, 6 + %gep = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %invariant.gep, i64 %mul25.i + %call1.i67.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 64, i32 noundef 2, i32 noundef 3, i32 noundef 0) #3 + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef %call1.i63.i, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef %call1.i67.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i32 noundef 3) #3, !noalias !66 + %add.i = add nuw nsw i32 %k.0.i, 1 + br label %for.cond.i, !llvm.loop !69 + +_ZZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit: ; preds = %for.cond.i + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i69.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + call void @__itt_offload_wi_finish_wrapper() ret void - -for.body: ; preds = %for.cond - %call.i.i = call spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef 2.000000e+00) #6 - call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %5) #8, !noalias !16 - call spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EET_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* sret(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2 %ref.tmp.ascast.i, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0, i64 noundef %conv) #7, !noalias !16 - %10 = load i16, i16 addrspace(4)* %value.i.i.i, align 2, !tbaa !19, !noalias !20 - %call.i.i.i.i = call spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext %10) #6, !noalias !20 - %call.i.i3.i.i = call spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext %call.i.i) #6, !noalias !20 - %add.i.i = fadd float %call.i.i.i.i, %call.i.i3.i.i - %call.i.i4.i.i = call spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef %add.i.i) #6, !noalias !20 - call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %5) #8, !noalias !16 - call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %6) - store i16 %call.i.i4.i.i, i16 addrspace(4)* %8, align 2, !tbaa !19 - %call.i58 = call spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESD_SB_m(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0, %"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef nonnull byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2 %agg.tmp.i54, i64 noundef %conv) #7 - call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %6) - %inc = add nuw nsw i32 %i.0, 1 - br label %for.cond, !llvm.loop !23 } -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #0 - -; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn -declare void @llvm.assume(i1 noundef) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #1 -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESB_(%"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2) local_unnamed_addr #4 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEmPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef) local_unnamed_addr #4 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EET_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* sret(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef, i64 noundef) local_unnamed_addr #4 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext) local_unnamed_addr #5 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef) local_unnamed_addr #5 +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESD_SB_m(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef, %"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2, i64 noundef) local_unnamed_addr #4 +declare void @__itt_offload_wi_start_wrapper() -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEvPT_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEmS7_S9_i(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* noundef, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #4 +declare void @__itt_offload_wi_finish_wrapper() -attributes #0 = { argmemonly nofree nosync nounwind willreturn } -attributes #1 = { convergent inlinehint norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { argmemonly nofree nounwind willreturn writeonly } -attributes #3 = { inaccessiblememonly nofree nosync nounwind willreturn } -attributes #4 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #5 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #6 = { convergent nounwind } -attributes #7 = { convergent } -attributes #8 = { nounwind } +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../joint_matrix_bfloat16.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent nounwind } !llvm.module.flags = !{!0, !1} !opencl.spir.version = !{!2} !spirv.Source = !{!3} -!llvm.ident = !{!4} +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46} +!llvm.ident = !{!47} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"frame-pointer", i32 2} !2 = !{i32 1, i32 2} !3 = !{i32 4, i32 100000} -!4 = !{!"clang version 15.0.0 (https://github.com/pauzinl/llvm.git fb27c655023f19ff91f09413a0c51f0a37071cff)"} -!5 = !{!6, !6, i64 0} -!6 = !{!"long", !7, i64 0} -!7 = !{!"omnipotent char", !8, i64 0} -!8 = !{!"Simple C++ TBAA"} -!9 = !{!10, !11, i64 0} -!10 = !{!"_ZTSN2cl4sycl3ext5intel12experimental8bfloat16E", !11, i64 0} -!11 = !{!"short", !7, i64 0} -!12 = !{!7, !7, i64 0} -!13 = !{!14} -!14 = distinct !{!14, !15, !"_ZNK2cl4sycl8accessorINS0_3ext5intel12experimental8bfloat16ELi2ELNS0_6access4modeE1026ELNS6_6targetE2014ELNS6_11placeholderE0ENS2_6oneapi22accessor_property_listIJEEEE11get_pointerILS8_2014EvEENS0_9multi_ptrIS5_LNS6_13address_spaceE1EEEv: %agg.result"} -!15 = distinct !{!15, !"_ZNK2cl4sycl8accessorINS0_3ext5intel12experimental8bfloat16ELi2ELNS0_6access4modeE1026ELNS6_6targetE2014ELNS6_11placeholderE0ENS2_6oneapi22accessor_property_listIJEEEE11get_pointerILS8_2014EvEENS0_9multi_ptrIS5_LNS6_13address_spaceE1EEEv"} -!16 = !{!17} -!17 = distinct !{!17, !18, !"_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS1_5intel12experimental8bfloat16ELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_: %agg.result"} -!18 = distinct !{!18, !"_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS1_5intel12experimental8bfloat16ELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_"} -!19 = !{!11, !11, i64 0} -!20 = !{!21, !17} -!21 = distinct !{!21, !22, !"_ZN2cl4sycl3ext5intel12experimentalplERKNS3_8bfloat16ES6_: %agg.result"} -!22 = distinct !{!22, !"_ZN2cl4sycl3ext5intel12experimentalplERKNS3_8bfloat16ES6_"} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.mustprogress"} +!4 = !{!"cpu", i32 1} +!5 = !{!"gpu", i32 2} +!6 = !{!"accelerator", i32 3} +!7 = !{!"custom", i32 4} +!8 = !{!"fp16", i32 5} +!9 = !{!"fp64", i32 6} +!10 = !{!"image", i32 9} +!11 = !{!"online_compiler", i32 10} +!12 = !{!"online_linker", i32 11} +!13 = !{!"queue_profiling", i32 12} +!14 = !{!"usm_device_allocations", i32 13} +!15 = !{!"usm_host_allocations", i32 14} +!16 = !{!"usm_shared_allocations", i32 15} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"int64_base_atomics", i32 7} +!43 = !{!"int64_extended_atomics", i32 8} +!44 = !{!"usm_system_allocator", i32 17} +!45 = !{!"usm_restricted_shared_allocations", i32 16} +!46 = !{!"host", i32 0} +!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!48 = !{i32 10642943} +!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!50 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false} +!51 = !{i32 16} +!52 = !{} +!53 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true} +!54 = !{!55, !57, !59} +!55 = distinct !{!55, !56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!56 = distinct !{!56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!57 = distinct !{!57, !58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!58 = distinct !{!58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!59 = distinct !{!59, !60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!60 = distinct !{!60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!61 = !{!62, !64, !59} +!62 = distinct !{!62, !63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!63 = distinct !{!63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!64 = distinct !{!64, !65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!65 = distinct !{!65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!66 = !{!67} +!67 = distinct !{!67, !68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!68 = distinct !{!68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!69 = distinct !{!69, !70} +!70 = !{!"llvm.loop.mustprogress"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_element.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_element.ll deleted file mode 100644 index 9ddff0fc2e..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_element.ll +++ /dev/null @@ -1,121 +0,0 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+all -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV - -; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.rev.bc -; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV-DAG: Capability JointMatrixINTEL -; CHECK-SPIRV-DAG: Capability JointMatrixWIInstructionsINTEL -; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" -; CHECK-SPIRV-DAG: TypeInt [[#TypeInt32:]] 32 -; CHECK-SPIRV-DAG: TypeInt [[#TypeInt64:]] 64 -; CHECK-SPIRV-DAG: TypeFloat [[#TypeFloat:]] 32 -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#TypeMatrix:]] [[#TypeFloat]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV-DAG: TypeVector [[#TypeVec:]] [[#TypeInt32]] 2 -; CHECK-SPIRV: Phi [[#TypeMatrix]] [[#Matrix:]] -; CHECK-SPIRV: JointMatrixWorkItemLengthINTEL [[#TypeInt64]] [[#]] [[#Matrix]] -; CHECK-SPIRV: VectorExtractDynamic [[#TypeFloat]] [[#]] [[#Matrix]] [[#Index:]] -; CHECK-SPIRV: FMul [[#TypeFloat]] [[#NewVal:]] [[#]] [[#]] -; CHECK-SPIRV: VectorInsertDynamic [[#TypeMatrix]] [[#]] [[#Matrix]] [[#NewVal]] [[#Index]] -; CHECK-SPIRV: JointMatrixGetElementCoordINTEL [[#TypeVec]] [[#]] [[#Matrix]] [[#Index]] - -; CHECK-LLVM: [[Length:%.*]] = call spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3(ptr addrspace(1) [[Matrix:%.*]]) -; CHECK-LLVM: [[Elem:%.*]] = call spir_func float @_Z28__spirv_VectorExtractDynamicPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3l(ptr addrspace(1) [[Matrix]], i64 [[Index:%.*]]) -; CHECK-LLVM: [[NewVal:%.*]] = fmul float [[Elem]], 5.000000e+00 -; CHECK-LLVM: {{%.*}} = call spir_func ptr addrspace(1) @_Z27__spirv_VectorInsertDynamicPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3fl(ptr addrspace(1) [[Matrix]], float [[NewVal]], i64 [[Index]]) -; CHECK-LLVM: {{%.*}} = call spir_func <2 x i32> @_Z39__spirv_JointMatrixGetElementCoordINTELPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3l(ptr addrspace(1) [[Matrix]], i64 [[Index]]) - -source_filename = "/work/tmp/matrix-slice.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"struct.cl::sycl::detail::AssertHappened" = type { i32, [257 x i8], [257 x i8], [129 x i8], i32, i64, i64, i64, i64, i64, i64 } -%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" } -%"class.cl::sycl::detail::array" = type { [1 x i64] } -%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" } -%spirv.JointMatrixINTEL._float_16_16_0_3 = type opaque - -$_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE = comdat any - -$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E6matrix = comdat any - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE(%"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_1, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_2, %"class.cl::sycl::id"* byval(%"class.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !5 { -entry: - %0 = getelementptr inbounds %"class.cl::sycl::id", %"class.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds %"struct.cl::sycl::detail::AssertHappened", %"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, i64 %2 - %3 = bitcast %"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %add.ptr.i to i8 addrspace(1)* - %4 = addrspacecast i8 addrspace(1)* %3 to i8 addrspace(4)* - tail call spir_func void @__devicelib_assert_read(i8 addrspace(4)* %4) #2 - ret void -} - -; Function Attrs: convergent -declare extern_weak dso_local spir_func void @__devicelib_assert_read(i8 addrspace(4)*) local_unnamed_addr #1 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E6matrix() local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 { -entry: - %call9.i.i = tail call spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)* addrspacecast (float addrspace(1)* null to float addrspace(4)*), i64 1, i32 0, i32 3, i32 0) #2 - br label %for.cond.i - -for.cond.i: ; preds = %for.body.i, %entry - %A.sroa.0.0.i = phi %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* [ %call9.i.i, %entry ], [ %call5.i.i, %for.body.i ] - %i.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ] - %conv.i = zext i32 %i.0.i to i64 - %call.i12.i = tail call spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEmPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i) #2 - %cmp.i = icmp ugt i64 %call.i12.i, %conv.i - br i1 %cmp.i, label %for.body.i, label %_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_7nd_itemILi2EEEE_clES5_.exit - -for.body.i: ; preds = %for.cond.i - %call.i.i = tail call spir_func float @_Z28__spirv_VectorExtractDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmET_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEET4_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, i64 %conv.i) #2 - %mul.i.i = fmul float %call.i.i, 5.000000e+00 - %call5.i.i = tail call spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEES7_T4_S5_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, float %mul.i.i, i64 %conv.i) #2 - %call6 = tail call spir_func <2 x i32> @_Z39__spirv_JointMatrixGetElementCoordINTELIaLm8ELm32ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEDv2_jPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEm(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, i64 %conv.i) #2 - %inc.i = add nuw nsw i32 %i.0.i, 1 - br label %for.cond.i, !llvm.loop !7 - -_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_7nd_itemILi2EEEE_clES5_.exit: ; preds = %for.cond.i - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)* addrspacecast (float addrspace(1)* null to float addrspace(4)*), %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, i64 1, i32 0, i32 3, i32 0) #2 - ret void -} - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEmPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func float @_Z28__spirv_VectorExtractDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmET_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEET4_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, i64) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEES7_T4_S5_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, float, i64) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)*, %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func <2 x i32> @_Z39__spirv_JointMatrixGetElementCoordINTELIaLm8ELm32ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEDv2_jPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEm(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, i64) #2 - -attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/tmp/matrix-slice.cpp" "uniform-work-group-size"="true" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { convergent } - -!llvm.module.flags = !{!0, !1} -!opencl.spir.version = !{!2} -!spirv.Source = !{!3} -!llvm.ident = !{!4} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"frame-pointer", i32 2} -!2 = !{i32 1, i32 2} -!3 = !{i32 4, i32 100000} -!4 = !{!"clang version 14.0.0 (https://github.com/intel/llvm.git 3648adf79e4fdb619fdbe41d63bc39f456b5be8c)"} -!5 = !{i32 -1, i32 -1, i32 -1, i32 -1} -!6 = !{} -!7 = distinct !{!7, !8} -!8 = !{!"llvm.loop.mustprogress"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll deleted file mode 100644 index a0f259934f..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll +++ /dev/null @@ -1,130 +0,0 @@ -; RUN: llvm-as -opaque-pointers=0 %s -o %t.bc - -; RUN: llvm-spirv -s %t.bc -opaque-pointers=0 -o %t.regularized.bc -; RUN: llvm-dis -opaque-pointers=0 %t.regularized.bc -o %t.regularized.ll -; RUN: FileCheck < %t.regularized.ll %s --check-prefix=CHECK-REGULARIZED - -; RUN: llvm-spirv %t.bc -opaque-pointers=0 --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv -to-text %t.spv -o %t.spt -; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - -; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.rev.bc -; RUN: llvm-dis %t.rev.bc -o %t.rev.ll -; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM - -; CHECK-REGULARIZED: %[[#ExtractElementCall:]] = call spir_func half @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* align 2{{.*}}, i64{{.*}}) -; CHECK-REGULARIZED: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(4)*{{.*}}, i32 0, i32 0 -; CHECK-REGULARIZED: store half %[[#ExtractElementCall]], half addrspace(4)* %[[#GEP]] -; CHECK-REGULARIZED: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half"*{{.*}}, i32 0, i32 0 -; CHECK-REGULARIZED: %[[#Component:]] = load half, half*{{.*}}, align 2 -; CHECK-REGULARIZED: call spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*{{.*}}, half %[[#Component]], i64{{.*}}) -; CHECK-REGULARIZED: declare dso_local spir_func half @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* align 2, i64) -; CHECK-REGULARIZED: declare dso_local spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, half, i64) - -; CHECK-SPIRV: Name [[#VIDValueId:]] "agg.tmp.ascast.ascast" -; CHECK-SPIRV: TypeFloat [[#Float16Id:]] 16 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#JointMatrixTyId:]] [[#]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: VectorExtractDynamic [[#Float16Id]] [[#VEDId:]] [[#]] [[#]] -; CHECK-SPIRV: Store [[#]] [[#VEDId]] -; CHECK-SPIRV: PtrAccessChain [[#]] [[#GEPId:]] [[#VIDValueId]] [[#]] [[#]] -; CHECK-SPIRV: Load [[#Float16Id]] [[#ComponentId:]] [[#GEPId]] -; CHECK-SPIRV: VectorInsertDynamic [[#JointMatrixTyId]] [[#]] [[#]] [[#ComponentId]] [[#]] - -; CHECK-LLVM: %[[#ExtractElementCall:]] = call spir_func half @_Z28__spirv_VectorExtractDynamicPU3AS139__spirv_JointMatrixINTEL__half_8_16_0_3l(ptr addrspace(1){{.*}}, i64{{.*}}) -; CHECK-LLVM: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", ptr addrspace(4) {{.*}}, i32 0, i32 0 -; CHECK-LLVM: store half %[[#ExtractElementCall]], ptr addrspace(4) %[[#GEP]] - -; CHECK-LLVM: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", ptr{{.*}}, i32 0, i32 0 -; CHECK-LLVM: %[[#Component:]] = load half, ptr %[[#GEP]] -; CHECK-LLVM: spir_func ptr addrspace(1) @_Z27__spirv_VectorInsertDynamicPU3AS139__spirv_JointMatrixINTEL__half_8_16_0_3Dhl(ptr addrspace(1){{.*}}, half %[[#Component]], i64{{.*}}) - -; ModuleID = 'element_wise_all_ops_half.bc' -source_filename = "llvm-link" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class.cl::sycl::detail::half_impl::half" = type { half } -%"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" = type { %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* } -%spirv.JointMatrixINTEL._half_8_16_0_3 = type opaque -%"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" = type { %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, i64 } - -$_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_ = comdat any - -$_ZN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEaSERKS8_ = comdat any - -; Function Attrs: convergent mustprogress norecurse -define linkonce_odr dso_local spir_func void @_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* noalias sret(%"class.cl::sycl::detail::half_impl::half") align 2 %agg.result, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* align 8 dereferenceable(16) %lhs, %"class.cl::sycl::detail::half_impl::half" addrspace(4)* align 2 dereferenceable(2) %rhs) #0 comdat { -entry: - %lhs.addr = alloca %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, align 8 - %ref.tmp1 = alloca %"class.cl::sycl::detail::half_impl::half", align 2 - %lhs.addr.ascast = addrspacecast %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)** %lhs.addr to %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* - %ref.tmp1.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half"* %ref.tmp1 to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %0 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %lhs.addr.ascast, align 8, !tbaa !8 - %M = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %0, i32 0, i32 0 - %1 = load %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* addrspace(4)* %M, align 8, !tbaa !15 - %spvm = getelementptr inbounds %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix", %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* %1, i32 0, i32 0 - %2 = load %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* addrspace(4)* %spvm, align 8, !tbaa !13 - %3 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %lhs.addr.ascast, align 8, !tbaa !8 - %idx = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %3, i32 0, i32 1 - %4 = load i64, i64 addrspace(4)* %idx, align 8, !tbaa !17 - call spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* sret(%"class.cl::sycl::detail::half_impl::half") align 2 %ref.tmp1.ascast, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* %2, i64 %4) #2 - ret void -} - -; Function Attrs: convergent mustprogress norecurse -define linkonce_odr dso_local spir_func align 8 dereferenceable(16) %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* @_ZN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEaSERKS8_(%"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* align 8 dereferenceable_or_null(16) %this, %"class.cl::sycl::detail::half_impl::half" addrspace(4)* align 2 dereferenceable(2) %rhs) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, align 8 - %agg.tmp = alloca %"class.cl::sycl::detail::half_impl::half", align 2 - %this.addr.ascast = addrspacecast %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)** %this.addr to %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* - %agg.tmp.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half"* %agg.tmp to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %this1 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 - %M = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1, i32 0, i32 0 - %0 = load %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* addrspace(4)* %M, align 8, !tbaa !15 - %spvm = getelementptr inbounds %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix", %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* %0, i32 0, i32 0 - %1 = load %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* addrspace(4)* %spvm, align 8, !tbaa !13 - %idx = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1, i32 0, i32 1 - %2 = load i64, i64 addrspace(4)* %idx, align 8, !tbaa !17 - %agg.tmp.ascast.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half" addrspace(4)* %agg.tmp.ascast to %"class.cl::sycl::detail::half_impl::half"* - %call = call spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* %1, %"class.cl::sycl::detail::half_impl::half"* byval(%"class.cl::sycl::detail::half_impl::half") align 2 %agg.tmp.ascast.ascast, i64 %2) #2 - ret %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1 -} - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, %"class.cl::sycl::detail::half_impl::half"* byval(%"class.cl::sycl::detail::half_impl::half") align 2, i64) #1 - -; Function Attrs: convergent -declare dso_local spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* sret(%"class.cl::sycl::detail::half_impl::half") align 2, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, i64) #1 - -attributes #0 = { convergent mustprogress norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { convergent } - -!opencl.spir.version = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0} -!spirv.Source = !{!1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1} -!opencl.used.extensions = !{!2, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3} -!opencl.used.optional.core.features = !{!4, !3, !3, !4, !3, !4, !3, !3, !3, !4, !3, !4} -!opencl.compiler.options = !{!3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3} -!llvm.ident = !{!5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5} -!llvm.module.flags = !{!6, !7} -!sycl.specialization-constants = !{} -!sycl.specialization-constants-default-values = !{} - -!0 = !{i32 1, i32 2} -!1 = !{i32 4, i32 100000} -!2 = !{!"cl_khr_fp16"} -!3 = !{} -!4 = !{!"cl_doubles"} -!5 = !{!"Compiler"} -!6 = !{i32 1, !"wchar_size", i32 4} -!7 = !{i32 7, !"frame-pointer", i32 2} -!8 = !{!9, !9, i64 0} -!9 = !{!"any pointer", !10, i64 0} -!10 = !{!"omnipotent char", !11, i64 0} -!11 = !{!"Simple C++ TBAA"} -!12 = !{!"long", !10, i64 0} -!13 = !{!14, !9, i64 0} -!14 = !{!"_ZTSN2cl4sycl3ext6oneapi12experimental6matrix12joint_matrixINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEE", !9, i64 0} -!15 = !{!16, !9, i64 0} -!16 = !{!"_ZTSN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEE", !9, i64 0, !12, i64 8} -!17 = !{!16, !12, i64 8} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll index 8ea9b3041d..6b5c380de5 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll @@ -1,166 +1,210 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; compiled from joint_matrix_half.cpp test from intel/llvm + +; RUN: llvm-as < %s -o %t.bc + +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_bfloat16_conversion,+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -; RUN: llvm-dis -opaque-pointers=0 %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV-DAG: TypeFloat [[#FloatTy:]] 32 -; CHECK-SPIRV-DAG: TypeFloat [[#HalfTy:]] 16 -; CHECK-SPIRV-DAG: TypeInt [[#IntTy:]] 32 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Zero:]] 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Two:]] 2 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Three:]] 3 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Sixteen:]] 16 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#CTy:]] [[#FloatTy]] [[#Two]] [[#Two]] [[#Zero]] [[#Three]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#ATy:]] [[#HalfTy]] [[#Two]] [[#Sixteen]] [[#Zero]] [[#Three]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#BTy:]] [[#HalfTy]] [[#Sixteen]] [[#Two]] [[#Three]] [[#Three]] - -; CHECK-LLVM: %spirv.JointMatrixINTEL._float_2_2_0_3 -; CHECK-LLVM: %spirv.JointMatrixINTEL._half_2_16_0_3 -; CHECK-LLVM: %spirv.JointMatrixINTEL._half_16_2_3_3 - -; ModuleID = 'joint_matrix_test-sycl-spir64-unknown-unknown.bc' -source_filename = "joint_matrix_test.cpp" +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: TypeInt [[#INT:]] 32 +; CHECK-SPIRV-DAG: TypeFloat [[#Half:]] 16 +; CHECK-SPIRV-DAG: TypeFloat [[#Float:]] 32 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST8:]] 8 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST16:]] 16 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST3:]] 3 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST2:]] 2 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST1:]] 1 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST0:]] 0 +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Float]] [[#CONST8]] [[#CONST16]] [[#CONST3]] [[#CONST3]] [[#CONST2]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#Half]] [[#CONST8]] [[#CONST16]] [[#CONST0]] [[#CONST3]] [[#CONST0]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#Half]] [[#CONST16]] [[#CONST16]] [[#CONST2]] [[#CONST3]] [[#CONST1]] + +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2PU3AS1fliii(ptr addrspace(1) %{{.*}}, i64 %{{.*}}, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) @"_Z79__spirv_JointMatrixLoadINTEL_RPU3AS141__spirv_JointMatrixINTEL__half_8_16_0_3_0PU3AS140class.sycl::_V1::detail::half_impl::halfliii"(ptr addrspace(1) %{{.*}}, i64 %{{.*}}, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) @"_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__half_16_16_2_3_1PU3AS140class.sycl::_V1::detail::half_impl::halfliii"(ptr addrspace(1) %{{.*}}, i64 %{{.*}}, i32 2, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELPU3AS141__spirv_JointMatrixINTEL__half_8_16_0_3_0PU3AS142__spirv_JointMatrixINTEL__half_16_16_2_3_1PU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2i(target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) %{{.*}}, target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i32 3) +; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS1fPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2liii(ptr addrspace(1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i64 %{{.*}}, i32 0, i32 3, i32 0) + +; ModuleID = 'half.bc' +source_filename = "../joint_matrix_half.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -%"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" = type { i32, [257 x i8], [257 x i8], [129 x i8], i32, i64, i64, i64, i64, i64, i64 } -%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } -%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -%"class.cl::sycl::detail::half_impl::half" = type { half } -%spirv.JointMatrixINTEL._float_2_2_0_3 = type opaque -%spirv.JointMatrixINTEL._half_2_16_0_3 = type opaque -%spirv.JointMatrixINTEL._half_16_2_3_3 = type opaque - -$_ZTSN2cl4sycl6detail16AssertInfoCopierE = comdat any +%"class.sycl::_V1::detail::half_impl::half" = type { half } -$_ZTSZ4mainE11matrix_test = comdat any +$_ZTSZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_E7imatrix = comdat any @__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 @__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail16AssertInfoCopierE(%"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 { +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_E7imatrix(ptr addrspace(1) noundef align 2 %_arg_accA, ptr addrspace(1) noundef align 2 %_arg_accB, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat !srcloc !49 !kernel_arg_buffer_location !50 !kernel_arg_runtime_aligned !51 !kernel_arg_exclusive_ptr !51 !intel_reqd_sub_group_size !52 !sycl_used_aspects !53 !sycl_fixed_targets !54 !sycl_kernel_omit_args !55 { entry: - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds %"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened", %"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, i64 %2 - %3 = bitcast %"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" addrspace(1)* %add.ptr.i to i8 addrspace(1)* - %4 = addrspacecast i8 addrspace(1)* %3 to i8 addrspace(4)* - tail call spir_func void @__devicelib_assert_read(i8 addrspace(4)* %4) #3 - ret void -} - -; Function Attrs: convergent -declare extern_weak dso_local spir_func void @__devicelib_assert_read(i8 addrspace(4)*) local_unnamed_addr #1 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11matrix_test(float addrspace(1)* %_arg_, i64 %_arg_1, %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_3, %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_5) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 !intel_reqd_sub_group_size !7 { -entry: - %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !8 - %1 = extractelement <3 x i64> %0, i64 1 - %2 = extractelement <3 x i64> %0, i64 0 - %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInLocalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15 - %4 = extractelement <3 x i64> %3, i64 1 - %5 = extractelement <3 x i64> %3, i64 0 - %cmp.i.i = icmp ult i64 %1, 2147483648 + call void @__itt_offload_wi_start_wrapper() + %0 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 8), align 8, !noalias !56 + %1 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !56 + %2 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !63 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !63 + %cmp.i.i = icmp ult i64 %0, 2147483648 tail call void @llvm.assume(i1 %cmp.i.i) - %cmp.i45.i = icmp ult i64 %2, 2147483648 - tail call void @llvm.assume(i1 %cmp.i45.i) - %cmp.i43.i = icmp ult i64 %4, 2147483648 - tail call void @llvm.assume(i1 %cmp.i43.i) - %sub.i = sub nsw i64 %1, %4 - %cmp.i41.i = icmp ult i64 %5, 2147483648 - tail call void @llvm.assume(i1 %cmp.i41.i) - %sub5.i = sub nsw i64 %2, %5 - %mul6.i = shl nsw i64 %sub.i, 6 - %add.ptr.i51 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %mul6.i - %add.ptr7.i52 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i51, i64 %sub5.i - %add.ptr7.i = addrspacecast float addrspace(1)* %add.ptr7.i52 to float addrspace(4)* - %call8.i = tail call spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)* %add.ptr7.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %add.ptr11.i53 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_3, i64 %mul6.i - %add.ptr16.i55 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_5, i64 %sub5.i + %cmp.i61.i = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i61.i) + %cmp.i63.i = icmp ult i64 %2, 2147483648 + tail call void @llvm.assume(i1 %cmp.i63.i) + %sub.i = sub nsw i64 %0, %2 + %cmp.i66.i = icmp ult i64 %3, 2147483648 + tail call void @llvm.assume(i1 %cmp.i66.i) + %sub5.i = sub nsw i64 %1, %3 + %mul.i = shl nsw i64 %sub.i, 3 + %mul8.i = mul i64 %mul.i, %_arg_N + %add.ptr.i.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accC, i64 %mul8.i + %div58.i = and i64 %sub5.i, -16 + %add.ptr.i80.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i, i64 %div58.i + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef %add.ptr.i80.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %mul34.i = shl nsw i64 %div58.i, 1 + %div1159.i = lshr i64 %_arg_K, 4 + %mul18.i = mul i64 %mul.i, %_arg_K + %add.ptr.i95.i = getelementptr inbounds %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %_arg_accA, i64 %mul18.i + %mul30.i = shl i64 %_arg_N, 1 + %invariant.gep = getelementptr %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %_arg_accB, i64 %mul34.i br label %for.cond.i for.cond.i: ; preds = %for.body.i, %entry + %sub_c.sroa.0.0.i = phi target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) [ %call1.i.i, %entry ], [ %call.i.i, %for.body.i ] %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] - %C.0.i = phi %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* [ %call8.i, %entry ], [ %call19.i, %for.body.i ] - %cmp.i = icmp ult i32 %k.0.i, 32 - br i1 %cmp.i, label %for.body.i, label %_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit + %conv.i = zext i32 %k.0.i to i64 + %cmp.i = icmp ugt i64 %div1159.i, %conv.i + br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit for.body.i: ; preds = %for.cond.i - %idx.ext46.i = zext i32 %k.0.i to i64 - %add.ptr12.i54 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr11.i53, i64 %idx.ext46.i - %add.ptr12.i = addrspacecast %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr12.i54 to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %call13.i = tail call spir_func %spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm2ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* %add.ptr12.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %mul14.i = shl nuw nsw i32 %k.0.i, 5 - %idx.ext1547.i = zext i32 %mul14.i to i64 - %add.ptr17.i56 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr16.i55, i64 %idx.ext1547.i - %add.ptr17.i = addrspacecast %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr17.i56 to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %call18.i = tail call spir_func %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm16ELm2ELN5__spv12MatrixLayoutE3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* %add.ptr17.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %call19.i = tail call spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN2cl4sycl6detail9half_impl4halfEfLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS6_3ELS6_0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS9_IT_XT1_EXT2_EXT4_EXT7_EEEPNS9_ISD_XT2_EXT3_EXT5_EXT7_EEESC_S8_(%spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %add.i = add nuw nsw i32 %k.0.i, 16 - br label %for.cond.i, !llvm.loop !20 - -_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit: ; preds = %for.cond.i - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)* %add.ptr7.i, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* %C.0.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 + %mul19.i = shl nsw i32 %k.0.i, 4 + %conv20.i = zext i32 %mul19.i to i64 + %add.ptr.i96.i = getelementptr inbounds %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %add.ptr.i95.i, i64 %conv20.i + %call1.i74.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i96.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %div27.i = shl nsw i32 %k.0.i, 3 + %conv28.i = zext i32 %div27.i to i64 + %mul31.i = mul i64 %mul30.i, %conv28.i + %gep = getelementptr %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %invariant.gep, i64 %mul31.i + %call1.i78.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef %mul30.i, i32 noundef 2, i32 noundef 3, i32 noundef 0) #3 + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V16detail9half_impl4halfEfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) noundef %call1.i74.i, target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) noundef %call1.i78.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i32 noundef 3) #3, !noalias !68 + %add.i = add nuw nsw i32 %k.0.i, 1 + br label %for.cond.i, !llvm.loop !71 + +_ZZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit: ; preds = %for.cond.i + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i80.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + call void @__itt_offload_wi_finish_wrapper() ret void } -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #1 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm2ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V16detail9half_impl4halfEfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) noundef, target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm16ELm2ELN5__spv12MatrixLayoutE3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN2cl4sycl6detail9half_impl4halfEfLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS6_3ELS6_0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS9_IT_XT1_EXT2_EXT4_EXT7_EEEPNS9_ISD_XT2_EXT3_EXT5_EXT7_EEESC_S8_(%spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)*, %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)*, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +declare void @__itt_offload_wi_start_wrapper() -; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn -declare void @llvm.assume(i1 noundef) #2 +declare void @__itt_offload_wi_finish_wrapper() -attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/intel/build/joint_matrix_test.cpp" "uniform-work-group-size"="true" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { inaccessiblememonly nofree nosync nounwind willreturn } -attributes #3 = { convergent } +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../joint_matrix_half.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent nounwind } !llvm.module.flags = !{!0, !1} !opencl.spir.version = !{!2} !spirv.Source = !{!3} -!opencl.used.extensions = !{!4} -!opencl.used.optional.core.features = !{!4} -!opencl.compiler.options = !{!4} -!llvm.ident = !{!5} +!sycl_types_that_use_aspects = !{!4} +!sycl_aspects = !{!5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47} +!llvm.ident = !{!48} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"frame-pointer", i32 2} !2 = !{i32 1, i32 2} !3 = !{i32 4, i32 100000} -!4 = !{} -!5 = !{!"Clang"} -!6 = !{i32 -1, i32 -1, i32 -1, i32 -1} -!7 = !{i32 16} -!8 = !{!9, !11, !13} -!9 = distinct !{!9, !10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!10 = distinct !{!10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!11 = distinct !{!11, !12, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!12 = distinct !{!12, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!13 = distinct !{!13, !14, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!14 = distinct !{!14, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!15 = !{!16, !18, !13} -!16 = distinct !{!16, !17, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!17 = distinct !{!17, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!18 = distinct !{!18, !19, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!19 = distinct !{!19, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!20 = distinct !{!20, !21, !22} -!21 = !{!"llvm.loop.mustprogress"} -!22 = !{!"llvm.loop.unroll.disable"} +!4 = !{!"class.sycl::_V1::detail::half_impl::half", i32 5} +!5 = !{!"cpu", i32 1} +!6 = !{!"gpu", i32 2} +!7 = !{!"accelerator", i32 3} +!8 = !{!"custom", i32 4} +!9 = !{!"fp16", i32 5} +!10 = !{!"fp64", i32 6} +!11 = !{!"image", i32 9} +!12 = !{!"online_compiler", i32 10} +!13 = !{!"online_linker", i32 11} +!14 = !{!"queue_profiling", i32 12} +!15 = !{!"usm_device_allocations", i32 13} +!16 = !{!"usm_host_allocations", i32 14} +!17 = !{!"usm_shared_allocations", i32 15} +!18 = !{!"usm_system_allocations", i32 17} +!19 = !{!"ext_intel_pci_address", i32 18} +!20 = !{!"ext_intel_gpu_eu_count", i32 19} +!21 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!22 = !{!"ext_intel_gpu_slices", i32 21} +!23 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!24 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!25 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!26 = !{!"ext_intel_mem_channel", i32 25} +!27 = !{!"usm_atomic_host_allocations", i32 26} +!28 = !{!"usm_atomic_shared_allocations", i32 27} +!29 = !{!"atomic64", i32 28} +!30 = !{!"ext_intel_device_info_uuid", i32 29} +!31 = !{!"ext_oneapi_srgb", i32 30} +!32 = !{!"ext_oneapi_native_assert", i32 31} +!33 = !{!"host_debuggable", i32 32} +!34 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!35 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!36 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!37 = !{!"ext_intel_free_memory", i32 36} +!38 = !{!"ext_intel_device_id", i32 37} +!39 = !{!"ext_intel_memory_clock_rate", i32 38} +!40 = !{!"ext_intel_memory_bus_width", i32 39} +!41 = !{!"emulated", i32 40} +!42 = !{!"ext_intel_legacy_image", i32 41} +!43 = !{!"int64_base_atomics", i32 7} +!44 = !{!"int64_extended_atomics", i32 8} +!45 = !{!"usm_system_allocator", i32 17} +!46 = !{!"usm_restricted_shared_allocations", i32 16} +!47 = !{!"host", i32 0} +!48 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!49 = !{i32 10643216} +!50 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!51 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false} +!52 = !{i32 16} +!53 = !{i32 5} +!54 = !{} +!55 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false} +!56 = !{!57, !59, !61} +!57 = distinct !{!57, !58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!58 = distinct !{!58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!59 = distinct !{!59, !60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!60 = distinct !{!60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!61 = distinct !{!61, !62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!62 = distinct !{!62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!63 = !{!64, !66, !61} +!64 = distinct !{!64, !65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!65 = distinct !{!65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!66 = distinct !{!66, !67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!67 = distinct !{!67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!68 = !{!69} +!69 = distinct !{!69, !70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_: %agg.result"} +!70 = distinct !{!70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_"} +!71 = distinct !{!71, !72} +!72 = !{!"llvm.loop.mustprogress"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/opaque_joint_matrix.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/opaque_joint_matrix.ll deleted file mode 100644 index 5b59c2d8f1..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/opaque_joint_matrix.ll +++ /dev/null @@ -1,151 +0,0 @@ -; RUN: llvm-as < %s -o %t.bc -; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o %t.spt -; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - -; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -opaque-pointers=0 -; RUN: llvm-dis -opaque-pointers=0 < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV-DAG: Capability JointMatrixINTEL -; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" -; CHECK-SPIRV-DAG: TypeInt [[#Int8Ty:]] 8 0 -; CHECK-SPIRV-DAG: TypeInt [[#Int32Ty:]] 32 0 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const12:]] 12 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const0:]] 0 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const48:]] 48 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Int32Ty]] [[#Const12]] [[#Const12]] [[#Const3]] [[#Const3]] [[#Const2]] -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#Int8Ty]] [[#Const12]] [[#Const48]] [[#Const0]] [[#Const3]] [[#Const0]] -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#Int8Ty]] [[#Const48]] [[#Const12]] [[#Const2]] [[#Const3]] [[#Const1]] - -; CHECK-LLVM-DAG: %spirv.JointMatrixINTEL._int_12_12_3_3_2 = type opaque -; CHECK-LLVM-DAG: %spirv.JointMatrixINTEL._char_12_48_0_3_0 = type opaque -; CHECK-LLVM-DAG: %spirv.JointMatrixINTEL._char_48_12_2_3_1 = type opaque - -; ModuleID = 'test-matrix-opaque.bc' -source_filename = "matrix-int8-test.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } -%"class.sycl::_V1::detail::array" = type { [2 x i64] } -%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } - -$_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix = comdat any - -@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { -entry: - %sub_c.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 - %ref.tmp29.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 - %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 - %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 - %0 = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 0 - %agg.tmp16.sroa.0.sroa.0.0.copyload = load i64, ptr %0, align 8 - %agg.tmp16.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 1 - %agg.tmp16.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp16.sroa.0.sroa.2.0..sroa_idx, align 8 - %mul.i4.i.i.i.i45 = mul i64 %agg.tmp16.sroa.0.sroa.0.0.copyload, %agg.tmp15.sroa.0.sroa.2.0.copyload - %add.i6.i.i.i.i46 = add i64 %mul.i4.i.i.i.i45, %agg.tmp16.sroa.0.sroa.2.0.copyload - %add.ptr.i47 = getelementptr inbounds i8, ptr addrspace(1) %_arg_accB, i64 %add.i6.i.i.i.i46 - %1 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 - %2 = extractelement <3 x i64> %1, i64 1 - %3 = extractelement <3 x i64> %1, i64 0 - %4 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 - %5 = extractelement <3 x i64> %4, i64 1 - %6 = extractelement <3 x i64> %4, i64 0 - %cmp.i.i = icmp ult i64 %2, 2147483648 - %cmp.i54.i = icmp ult i64 %3, 2147483648 - %cmp.i56.i = icmp ult i64 %5, 2147483648 - %sub.i = sub nsw i64 %2, %5 - %cmp.i58.i = icmp ult i64 %6, 2147483648 - %sub5.i = sub nsw i64 %3, %6 - %sub_c.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %sub_c.sroa.0.i to ptr - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) - %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef 0) #4 - store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 - %mul.i = mul nsw i64 %sub.i, 12 - %div2452.i = lshr i64 %sub5.i, 4 - %mul26.i = mul i64 %div2452.i, 48 - %div.i = udiv i64 %_arg_K, 48 - %mul11.i = mul i64 %mul.i, %_arg_K - %add.ptr.i93.i = getelementptr inbounds i8, ptr addrspace(1) %_arg_accA, i64 %mul11.i - %idx.neg.i.i104.i = sub i64 0, %add.i6.i.i.i.i46 - %add.ptr.i.i105141.i = getelementptr i8, ptr addrspace(1) %add.ptr.i47, i64 %mul26.i - %mul22.i = shl i64 %_arg_N, 2 - %add.ptr.i108140.i = getelementptr i8, ptr addrspace(1) %add.ptr.i.i105141.i, i64 %idx.neg.i.i104.i - %ref.tmp29.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %ref.tmp29.sroa.0.i to ptr - %7 = bitcast ptr %ref.tmp29.sroa.0.i to ptr - %8 = bitcast ptr %sub_c.sroa.0.i to ptr - br label %for.cond.i - -for.cond.i: ; preds = %for.body.i, %entry - %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] - %conv.i = zext i32 %k.0.i to i64 - %cmp.i = icmp ugt i64 %div.i, %conv.i - br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit - -for.body.i: ; preds = %for.cond.i - %mul12.i = mul nsw i32 %k.0.i, 48 - %conv13.i = zext i32 %mul12.i to i64 - %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i - %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) - %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 - %div20.i = mul nsw i32 %k.0.i, 12 - %conv21.i = zext i32 %div20.i to i64 - %mul23.i = mul i64 %mul22.i, %conv21.i - %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i - %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) - %call1.i73.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i, i32 noundef 2, i32 noundef 3, i32 noundef 0) #4 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 - %call.i77.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef %call1.i.i, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef %call1.i73.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 3) #4 - store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 - %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %7, align 8 - store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %8, align 8 - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) - %add.i = add nuw nsw i32 %k.0.i, 1 - br label %for.cond.i - -_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond.i - %mul37.i = mul i64 %mul.i, %_arg_N - %add.ptr.i.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_accC, i64 %mul37.i - %mul39.i = mul nuw i64 %div2452.i, 12 - %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i - %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) - ret void -} - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 - -attributes #0 = { convergent norecurse "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="matrix-int8-test.cpp" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } -attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -attributes #4 = { convergent } diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/sycl_2020_namespace.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/sycl_2020_namespace.ll deleted file mode 100644 index 2a813950fd..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/sycl_2020_namespace.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llvm-as -opaque-pointers=0 %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -; Ensure that ::sycl::_V1.*{half|bfloat16} are parsed as SYCL types. - -; CHECK-DAG: TypeFloat [[#HalfTy:]] 16 -; CHECK-DAG: TypeInt [[#BFloat16Ty:]] 16 - -%"class.sycl::_V1::anything::half" = type { half } -%"class.sycl::_V1::anything::bfloat16" = type { i16 } - -%"struct.__spv::__spirv_JointMatrixINTEL.half" = type { [2 x [2 x [1 x [4 x %"class.sycl::_V1::anything::half"]]]]* } -%"struct.__spv::__spirv_JointMatrixINTEL.bfloat16" = type { [2 x [2 x [1 x [4 x %"class.sycl::_V1::anything::bfloat16"]]]]* } - -define spir_func void @foo(%"struct.__spv::__spirv_JointMatrixINTEL.half" *) { - ret void -} - -define spir_func void @bar(%"struct.__spv::__spirv_JointMatrixINTEL.bfloat16" *) { - ret void -} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll new file mode 100644 index 0000000000..8d0dcbfde4 --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll @@ -0,0 +1,53 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_joint_matrix,+SPV_INTEL_tensor_float32_conversion -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_tensor_float32_conversion 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR-NEXT: RoundFToTF32INTEL +; CHECK-ERROR-NEXT: Can be used with cooperative matrices only when SPV_INTEL_joint_matrix is enabled + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Capability TensorFloat32RoundingINTEL +; CHECK-SPIRV-DAG: Capability JointMatrixTF32ComponentTypeINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_tensor_float32_conversion" +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: TypeFloat [[#FP32Ty:]] 32 +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#FP32MatTy:]] [[#FP32Ty]] +; CHECK-SPIRV: CompositeConstruct [[#FP32MatTy]] [[#FP32Mat:]] +; CHECK-SPIRV: RoundFToTF32INTEL [[#FP32MatTy]] [[#]] [[#FP32Mat]] + +; CHECK-LLVM: %[[#Mat:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z25__spirv_RoundFToTF32INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#Mat]]) + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_tf() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"PIE Level", i32 2} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{!"clang version 17.0.0"} diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll index 87e4fc17dd..40b8749e11 100644 --- a/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll @@ -20,79 +20,79 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: SNegate [[#MatrixTypeInt]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z15__spirv_SNegatePU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1) +; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z15__spirv_SNegatePU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1) define spir_kernel void @testSNegate(i32 %a) #0 !kernel_arg_addr_space !10 !kernel_arg_access_qual !11 !kernel_arg_type !12 !kernel_arg_type_qual !9 !kernel_arg_base_type !12 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: FNegate [[#MatrixTypeFloat]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z15__spirv_FNegatePU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) +; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z15__spirv_FNegatePU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) define spir_kernel void @testFNeg(float %a) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !9 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: IAdd [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: %2 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IAddPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: %2 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IAddPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define spir_kernel void @testIAdd(i32 %a, i32 %b) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: ISub [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_ISubPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_ISubPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define spir_kernel void @testISub(i32 %a, i32 %b) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: IMul [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IMulPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IMulPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define spir_kernel void @testIMul(i32 %a, i32 %b) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: SDiv [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_SDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_SDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define void @testSDiv(i32 %a, i32 %b) { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: UDiv [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_UDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_UDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define void @testUDiv(i32 %a, i32 %b) { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } @@ -100,69 +100,69 @@ define void @testUDiv(i32 %a, i32 %b) { ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FAdd [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FAddPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FAddPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFAdd(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FSub [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FSubPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FSubPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFSub(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FMul [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FMulPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FMulPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFMul(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FDiv [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FDivPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FDivPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFDiv(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) attributes #0 = { nounwind } diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll new file mode 100644 index 0000000000..28979b4eb1 --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll @@ -0,0 +1,437 @@ +;; Compiled from joint_matrix_bf16_fill_k_cache.cpp from https://github.com/intel/llvm +;; command: clang++ -fsycl -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp -fsycl-device-only -o test.bc +;; and then JointMatrixINTEL target ext type was replaced with CooperativeMatrixKHR + +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV: TypeInt [[#Int16Ty:]] 16 0 +; CHECK-SPIRV: TypeFloat [[#FloatTy:]] 32 +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#FloatTy]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeStruct [[#StructTy1:]] [[#MatTy1]] +; CHECK-SPIRV: TypeArray [[#ArrayTy1:]] [[#StructTy1]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy1]] [[#]] +; CHECK-SPIRV: TypeStruct [[#StructTy2:]] [[#MatTy2]] +; CHECK-SPIRV: TypeArray [[#ArrayTy2:]] [[#StructTy2]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy2]] [[#]] +; CHECK-SPIRV: TypeStruct [[#StructTy3:]] [[#MatTy3]] +; CHECK-SPIRV: TypeArray [[#ArrayTy3:]] [[#StructTy3]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy3]] [[#]] + +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) } +; CHECK-LLVM: alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]] + +; ModuleID = 'test.bc' +source_filename = "llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::__generated_multi_ptr" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.0" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.1" = type { ptr addrspace(1) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) } +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } + +$_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_ = comdat any + +@__spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_(ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr") align 8 %_arg_pA, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.0") align 8 %_arg_pB, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.1") align 8 %_arg_pC) local_unnamed_addr #0 comdat !srcloc !59 !kernel_arg_buffer_location !60 !intel_reqd_sub_group_size !61 !sycl_fixed_targets !62 !sycl_kernel_omit_args !63 { +entry: + call void @__itt_offload_wi_start_wrapper() + %tC.i = alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], align 8 + %tA.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], align 8 + %tB.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], align 8 + %0 = load i64, ptr %_arg_pA, align 8, !tbaa !64 + %1 = inttoptr i64 %0 to ptr addrspace(1) + %2 = load i64, ptr %_arg_pB, align 8, !tbaa !64 + %3 = inttoptr i64 %2 to ptr addrspace(1) + %4 = load i64, ptr %_arg_pC, align 8, !tbaa !64 + %5 = inttoptr i64 %4 to ptr addrspace(1) + %6 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, i64 8), align 8, !noalias !68 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32, !noalias !68 + %8 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !75 + %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !75 + %cmp.i.i = icmp ult i64 %6, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + %cmp.i208.i = icmp ult i64 %7, 2147483648 + tail call void @llvm.assume(i1 %cmp.i208.i) + %cmp.i209.i = icmp ult i64 %8, 2147483648 + tail call void @llvm.assume(i1 %cmp.i209.i) + %cmp.i212.i = icmp ult i64 %9, 2147483648 + tail call void @llvm.assume(i1 %cmp.i212.i) + %div205.i = lshr i64 %9, 4 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tC.i) #4 + br label %arrayctor.loop.i + +arrayctor.loop.i: ; preds = %arrayctor.loop.i, %entry + %arrayctor.cur.idx.i = phi i64 [ 0, %entry ], [ %arrayctor.cur.add.i, %arrayctor.loop.i ] + %arrayctor.cur.add.i = add nuw nsw i64 %arrayctor.cur.idx.i, 1 + %arrayctor.done.i = icmp eq i64 %arrayctor.cur.add.i, 16 + br i1 %arrayctor.done.i, label %for.cond.i, label %arrayctor.loop.i + +for.cond.i: ; preds = %arrayctor.loop.i, %for.cond.cleanup7.i + %m.0.i = phi i32 [ %inc12.i, %for.cond.cleanup7.i ], [ 0, %arrayctor.loop.i ] + %cmp.i = icmp ult i32 %m.0.i, 4 + br i1 %cmp.i, label %for.cond5.preheader.i, label %for.cond14.preheader.i + +for.cond5.preheader.i: ; preds = %for.cond.i + %idxprom.i = zext i32 %m.0.i to i64 + br label %for.cond5.i + +for.cond14.preheader.i: ; preds = %for.cond.i + %mul50.i = shl nuw nsw i64 %6, 8 + %mul51.i = shl nuw nsw i64 %8, 5 + %add52.i = add nuw nsw i64 %mul50.i, %mul51.i + %mul80.i = shl nuw nsw i64 %div205.i, 7 + %10 = shl nuw nsw i64 %7, 9 + %11 = add nuw nsw i64 %10, %mul80.i + br label %for.cond14.i + +for.cond5.i: ; preds = %for.body8.i, %for.cond5.preheader.i + %n.0.i = phi i32 [ %inc.i, %for.body8.i ], [ 0, %for.cond5.preheader.i ] + %cmp6.i = icmp ult i32 %n.0.i, 4 + br i1 %cmp6.i, label %for.body8.i, label %for.cond.cleanup7.i + +for.cond.cleanup7.i: ; preds = %for.cond5.i + %inc12.i = add nuw nsw i32 %m.0.i, 1 + br label %for.cond.i, !llvm.loop !80 + +for.body8.i: ; preds = %for.cond5.i + %conv.i = zext i32 %n.0.i to i64 + %arrayidx10.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom.i, i64 %conv.i + %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef 0.000000e+00) #5 + store target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) %call.i.i, ptr %arrayidx10.i, align 8, !tbaa !82 + %inc.i = add nuw nsw i32 %n.0.i, 1 + br label %for.cond5.i, !llvm.loop !84 + +for.cond14.i: ; preds = %for.cond.cleanup34.i, %for.cond14.preheader.i + %k2.0.i = phi i32 [ %inc129.i, %for.cond.cleanup34.i ], [ 0, %for.cond14.preheader.i ] + %cmp15.i = icmp ult i32 %k2.0.i, 8 + br i1 %cmp15.i, label %for.body17.i, label %for.cond132.preheader.i + +for.cond132.preheader.i: ; preds = %for.cond14.i + %mul156.i = shl nuw nsw i64 %7, 8 + %mul157.i = shl nuw nsw i64 %div205.i, 6 + %add158.i = add nuw nsw i64 %mul156.i, %mul157.i + br label %for.cond132.i + +for.body17.i: ; preds = %for.cond14.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tA.i) #4 + br label %arrayctor.loop20.i + +arrayctor.loop20.i: ; preds = %arrayctor.loop20.i, %for.body17.i + %arrayctor.cur21.idx.i = phi i64 [ 0, %for.body17.i ], [ %arrayctor.cur21.add.i, %arrayctor.loop20.i ] + %arrayctor.cur21.add.i = add nuw nsw i64 %arrayctor.cur21.idx.i, 1 + %arrayctor.done23.i = icmp eq i64 %arrayctor.cur21.add.i, 8 + br i1 %arrayctor.done23.i, label %arrayctor.cont24.i, label %arrayctor.loop20.i + +arrayctor.cont24.i: ; preds = %arrayctor.loop20.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tB.i) #4 + br label %arrayctor.loop27.i + +arrayctor.loop27.i: ; preds = %arrayctor.loop27.i, %arrayctor.cont24.i + %arrayctor.cur28.idx.i = phi i64 [ 0, %arrayctor.cont24.i ], [ %arrayctor.cur28.add.i, %arrayctor.loop27.i ] + %arrayctor.cur28.add.i = add nuw nsw i64 %arrayctor.cur28.idx.i, 1 + %arrayctor.done30.i = icmp eq i64 %arrayctor.cur28.add.i, 8 + br i1 %arrayctor.done30.i, label %for.cond32.preheader.i, label %arrayctor.loop27.i + +for.cond32.preheader.i: ; preds = %arrayctor.loop27.i + %12 = shl nuw i32 %k2.0.i, 1 + br label %for.cond32.i + +for.cond32.i: ; preds = %for.cond.cleanup92.i, %for.cond32.preheader.i + %k1.0.i = phi i32 [ %inc126.i, %for.cond.cleanup92.i ], [ 0, %for.cond32.preheader.i ] + %cmp33.i = icmp ult i32 %k1.0.i, 2 + br i1 %cmp33.i, label %for.body35.i, label %for.cond.cleanup34.i + +for.cond.cleanup34.i: ; preds = %for.cond32.i + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tB.i) #4 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tA.i) #4 + %inc129.i = add nuw nsw i32 %k2.0.i, 1 + br label %for.cond14.i, !llvm.loop !85 + +for.body35.i: ; preds = %for.cond32.i + %13 = add nuw i32 %12, %k1.0.i + %div37206.i = and i32 %13, 268435455 + %idxprom46.i = zext i32 %k1.0.i to i64 + %mul57.i = shl nuw nsw i32 %div37206.i, 4 + %conv58.i = zext i32 %mul57.i to i64 + %invariant.gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %1, i64 %conv58.i + br label %for.cond39.i + +for.cond39.i: ; preds = %for.body42.i, %for.body35.i + %m38.0.i = phi i32 [ 0, %for.body35.i ], [ %inc60.i, %for.body42.i ] + %cmp40.i = icmp ult i32 %m38.0.i, 4 + br i1 %cmp40.i, label %for.body42.i, label %for.cond63.preheader.i + +for.cond63.preheader.i: ; preds = %for.cond39.i + %mul77.i = shl nuw nsw i32 %div37206.i, 12 + %conv78.i = zext i32 %mul77.i to i64 + %add.ptr.i225.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %3, i64 %conv78.i + br label %for.cond63.i + +for.body42.i: ; preds = %for.cond39.i + %idxprom44.i = zext i32 %m38.0.i to i64 + %arrayidx47.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom44.i, i64 %idxprom46.i + %mul53.i = shl nuw nsw i32 %m38.0.i, 3 + %conv54.i = zext i32 %mul53.i to i64 + %add55.i = add nuw nsw i64 %add52.i, %conv54.i + %mul56.i = shl nuw nsw i64 %add55.i, 8 + %gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %invariant.gep, i64 %mul56.i + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) %call1.i.i, ptr %arrayidx47.i, align 8, !tbaa !86 + %inc60.i = add nuw nsw i32 %m38.0.i, 1 + br label %for.cond39.i, !llvm.loop !88 + +for.cond63.i: ; preds = %for.body67.i, %for.cond63.preheader.i + %n62.0.i = phi i32 [ %inc87.i, %for.body67.i ], [ 0, %for.cond63.preheader.i ] + %cmp65.i = icmp ult i32 %n62.0.i, 4 + br i1 %cmp65.i, label %for.body67.i, label %for.cond90.i + +for.body67.i: ; preds = %for.cond63.i + %conv64.i = zext i32 %n62.0.i to i64 + %arrayidx72.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv64.i, i64 %idxprom46.i + %14 = shl nuw nsw i64 %conv64.i, 5 + %mul85.i = add nuw nsw i64 %14, %11 + %add.ptr.i226.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i225.i, i64 %mul85.i + %call1.i219.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i226.i, i64 noundef 512, i32 noundef 2, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) %call1.i219.i, ptr %arrayidx72.i, align 8, !tbaa !89 + %inc87.i = add nuw nsw i32 %n62.0.i, 1 + br label %for.cond63.i, !llvm.loop !91 + +for.cond90.i: ; preds = %for.cond63.i, %for.cond.cleanup98.i + %m89.0.i = phi i32 [ %inc123.i, %for.cond.cleanup98.i ], [ 0, %for.cond63.i ] + %cmp91.i = icmp ult i32 %m89.0.i, 4 + br i1 %cmp91.i, label %for.cond95.preheader.i, label %for.cond.cleanup92.i + +for.cond95.preheader.i: ; preds = %for.cond90.i + %idxprom102.i = zext i32 %m89.0.i to i64 + %arrayidx105.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom102.i, i64 %idxprom46.i + %15 = load target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0), ptr %arrayidx105.i, align 8, !tbaa !86, !noalias !92 + br label %for.cond95.i + +for.cond.cleanup92.i: ; preds = %for.cond90.i + %inc126.i = add nuw nsw i32 %k1.0.i, 1 + br label %for.cond32.i, !llvm.loop !95 + +for.cond95.i: ; preds = %for.body99.i, %for.cond95.preheader.i + %n94.0.i = phi i32 [ %inc120.i, %for.body99.i ], [ 0, %for.cond95.preheader.i ] + %cmp97.i = icmp ult i32 %n94.0.i, 4 + br i1 %cmp97.i, label %for.body99.i, label %for.cond.cleanup98.i + +for.cond.cleanup98.i: ; preds = %for.cond95.i + %inc123.i = add nuw nsw i32 %m89.0.i, 1 + br label %for.cond90.i, !llvm.loop !96 + +for.body99.i: ; preds = %for.cond95.i + %conv96.i = zext i32 %n94.0.i to i64 + %arrayidx109.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv96.i, i64 %idxprom46.i + %arrayidx113.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom102.i, i64 %conv96.i + %16 = load target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1), ptr %arrayidx109.i, align 8, !tbaa !89, !noalias !92 + %17 = load target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2), ptr %arrayidx113.i, align 8, !tbaa !82, !noalias !92 + %call.i221.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef %15, target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) noundef %16, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef %17, i32 noundef 3) #5, !noalias !92 + store target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) %call.i221.i, ptr %arrayidx113.i, align 8, !tbaa !82 + %inc120.i = add nuw nsw i32 %n94.0.i, 1 + br label %for.cond95.i, !llvm.loop !97 + +for.cond132.i: ; preds = %for.cond.cleanup140.i, %for.cond132.preheader.i + %m131.0.i = phi i32 [ %inc166.i, %for.cond.cleanup140.i ], [ 0, %for.cond132.preheader.i ] + %cmp133.i = icmp ult i32 %m131.0.i, 4 + br i1 %cmp133.i, label %for.cond137.preheader.i, label %_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit + +for.cond137.preheader.i: ; preds = %for.cond132.i + %idxprom143.i = zext i32 %m131.0.i to i64 + %mul152.i = shl nuw nsw i32 %m131.0.i, 3 + %conv153.i = zext i32 %mul152.i to i64 + %add154.i = add nuw nsw i64 %add52.i, %conv153.i + %mul155.i = shl nuw nsw i64 %add154.i, 8 + %add.ptr.i227.i = getelementptr inbounds float, ptr addrspace(1) %5, i64 %mul155.i + br label %for.cond137.i + +for.cond137.i: ; preds = %for.body141.i, %for.cond137.preheader.i + %n136.0.i = phi i32 [ %inc163.i, %for.body141.i ], [ 0, %for.cond137.preheader.i ] + %cmp139.i = icmp ult i32 %n136.0.i, 4 + br i1 %cmp139.i, label %for.body141.i, label %for.cond.cleanup140.i + +for.cond.cleanup140.i: ; preds = %for.cond137.i + %inc166.i = add nuw nsw i32 %m131.0.i, 1 + br label %for.cond132.i, !llvm.loop !98 + +for.body141.i: ; preds = %for.cond137.i + %conv138.i = zext i32 %n136.0.i to i64 + %arrayidx146.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom143.i, i64 %conv138.i + %mul160.i = shl nuw nsw i64 %conv138.i, 4 + %add161.i = add nuw nsw i64 %add158.i, %mul160.i + %add.ptr.i228.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i227.i, i64 %add161.i + %18 = load target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2), ptr %arrayidx146.i, align 8, !tbaa !82 + tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i228.i, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef %18, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + %inc163.i = add nuw nsw i32 %n136.0.i, 1 + br label %for.cond137.i, !llvm.loop !99 + +_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond132.i + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tC.i) #4 + call void @__itt_offload_wi_finish_wrapper() + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef, target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +declare void @__itt_offload_wi_start_wrapper() + +declare void @__itt_offload_wi_finish_wrapper() + +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +attributes #3 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind } +attributes #5 = { convergent nounwind } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57} +!llvm.ident = !{!58} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"cpu", i32 1} +!5 = !{!"gpu", i32 2} +!6 = !{!"accelerator", i32 3} +!7 = !{!"custom", i32 4} +!8 = !{!"fp16", i32 5} +!9 = !{!"fp64", i32 6} +!10 = !{!"image", i32 9} +!11 = !{!"online_compiler", i32 10} +!12 = !{!"online_linker", i32 11} +!13 = !{!"queue_profiling", i32 12} +!14 = !{!"usm_device_allocations", i32 13} +!15 = !{!"usm_host_allocations", i32 14} +!16 = !{!"usm_shared_allocations", i32 15} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"int64_base_atomics", i32 7} +!54 = !{!"int64_extended_atomics", i32 8} +!55 = !{!"usm_system_allocator", i32 17} +!56 = !{!"usm_restricted_shared_allocations", i32 16} +!57 = !{!"host", i32 0} +!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!59 = !{i32 242145} +!60 = !{i32 -1, i32 -1, i32 -1} +!61 = !{i32 16} +!62 = !{} +!63 = !{i1 false, i1 false, i1 false} +!64 = !{!65, !65, i64 0} +!65 = !{!"any pointer", !66, i64 0} +!66 = !{!"omnipotent char", !67, i64 0} +!67 = !{!"Simple C++ TBAA"} +!68 = !{!69, !71, !73} +!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!75 = !{!76, !78, !73} +!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!80 = distinct !{!80, !81} +!81 = !{!"llvm.loop.mustprogress"} +!82 = !{!83, !65, i64 0} +!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!84 = distinct !{!84, !81} +!85 = distinct !{!85, !81} +!86 = !{!87, !65, i64 0} +!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!88 = distinct !{!88, !81} +!89 = !{!90, !65, i64 0} +!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!91 = distinct !{!91, !81} +!92 = !{!93} +!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!95 = distinct !{!95, !81} +!96 = distinct !{!96, !81} +!97 = distinct !{!97, !81} +!98 = distinct !{!98, !81} +!99 = distinct !{!99, !81} diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll new file mode 100644 index 0000000000..39bc1763e9 --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll @@ -0,0 +1,149 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv +; TODO: Validation is disabled till the moment the tools in CI are updated (passes locally) +; R/UN: spirv-val %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r --spirv-target-env=SPV-IR %t.spv -o %t.rev.bc +; RUN: llvm-dis %t.rev.bc +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV: TypeInt [[#TypeInt32:]] 32 0 +; CHECK-SPIRV: TypeInt [[#TypeInt16:]] 16 0 +; CHECK-SPIRV: TypeInt [[#TypeInt8:]] 8 0 +; CHECK-SPIRV: TypeFloat [[#TypeFloat:]] 32 +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeFloat:]] [[#TypeFloat]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeInt32:]] [[#TypeInt32]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeInt16:]] [[#TypeInt16]] +; CHECK-SPIRV: TypeFloat [[#TypeFloat16:]] 16 +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeFloat16:]] [[#TypeFloat16]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeInt8:]] [[#TypeInt8]] + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertFToU [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#Matrix]]) + +define void @convert_f_to_u() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertFToS [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#Matrix]]) + +define void @convert_f_to_s() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt16]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertSToF [[#MatrixTypeFloat16]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#Matrix]]) + +define void @convert_s_to_f() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt16]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertUToF [[#MatrixTypeFloat16]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#Matrix]]) + +define void @convert_u_to_f() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt32]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: UConvert [[#MatrixTypeInt8]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %[[#Matrix]]) + +define void @u_convert() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt8]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: SConvert [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructc(i8 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) %[[#Matrix]]) + +define void @s_convert() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt8(i8 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat16]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: FConvert [[#MatrixTypeFloat]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructDh(half 0xH0000) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) %[[#Matrix]]) + +define void @f_convert() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructHalf(half 0xH0000) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) %0) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructHalf(half noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt8(i8 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) noundef) + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"PIE Level", i32 2} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 08d094a0e457360ad8b94b017d2dc277e697ca76)"} diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll index 9c7e9d9f91..18897f6203 100644 --- a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll @@ -18,9 +18,9 @@ ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const3]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const2]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const0]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const1]] ; CHECK-SPIRV: CompositeConstruct [[#MatTy1]] ; CHECK-SPIRV: CooperativeMatrixLoadKHR [[#MatTy2]] ; CHECK-SPIRV: CooperativeMatrixLengthKHR [[#Int32Ty]] [[#]] [[#MatTy2]] @@ -29,12 +29,13 @@ ; CHECK-SPIRV: CooperativeMatrixStoreKHR -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3PU3AS4clii -; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_3PU3AS4cl -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR{{.*}}(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) -; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR{{.*}}(ptr addrspace(4) %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0PU3AS4clii +; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS4cl +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR{{.*}}(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) +; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR{{.*}}(ptr addrspace(4) %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) + ; ModuleID = 'test-matrix-opaque.bc' source_filename = "matrix-int8-test.cpp" @@ -53,8 +54,8 @@ $_ZTSZZ15matrix_multiply = comdat any ; Function Attrs: convergent norecurse define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiply(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { entry: - %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 - %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 + %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 + %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 %0 = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 0 @@ -78,8 +79,8 @@ entry: %sub5.i = sub nsw i64 %3, %6 %sub_c.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %sub_c.sroa.0.i to ptr call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) - %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i.i, ptr %sub_c.sroa.0.i, align 8 + %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 %mul.i = mul nsw i64 %sub.i, 12 %div2452.i = lshr i64 %sub5.i, 4 %mul26.i = mul i64 %div2452.i, 48 @@ -106,18 +107,18 @@ for.body.i: ; preds = %for.cond.i %conv13.i = zext i32 %mul12.i to i64 %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) - %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 - %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %call1.i.i) + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 + %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %call1.i.i) %div20.i = mul nsw i32 %k.0.i, 12 %conv21.i = zext i32 %div20.i to i64 %mul23.i = mul i64 %mul22.i, %conv21.i %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) - %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 + %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %7, align 8 store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %8, align 8 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) @@ -130,28 +131,28 @@ _ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6 %mul39.i = mul nuw i64 %div2452.i, 12 %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) ret void } ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 -declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef) +declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef) ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_scope.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_scope.ll new file mode 100644 index 0000000000..ef25389724 --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_scope.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv 2>&1 | FileCheck %s + +; CHECK: InvalidInstruction: Can't translate llvm instruction: +; CHECK: TypeCooperativeMatrixKHR +; CHECK: Unsupported Scope parameter + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_u() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_use.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_use.ll new file mode 100644 index 0000000000..0016f888cb --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_use.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv 2>&1 | FileCheck %s + +; CHECK: InvalidInstruction: Can't translate llvm instruction: +; CHECK: TypeCooperativeMatrixKHR +; CHECK: Incorrect Use parameter, should be MatrixA, MatrixB or Accumulator + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_u() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) diff --git a/test/transcoding/spirv-target-types-buffer.ll b/test/transcoding/spirv-target-types-buffer.ll new file mode 100644 index 0000000000..4de5ad32fb --- /dev/null +++ b/test/transcoding/spirv-target-types-buffer.ll @@ -0,0 +1,23 @@ +; Check translation of the buffer surface target extension type +; +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv -spirv-ext=+SPV_INTEL_vector_compute %t.bc -spirv-text -o %t.spv.txt +; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; CHECK-SPIRV: Capability VectorComputeINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_vector_compute" +; CHECK-SPIRV: Name [[#FuncName:]] "foo" +; CHECK-SPIRV: Name [[#ParamName:]] "a" +; CHECK-SPIRV: TypeVoid [[#VoidT:]] +; CHECK-SPIRV: TypeBufferSurfaceINTEL [[#BufferID:]] +; CHECK-SPIRV: Function [[#VoidT]] [[#FuncID:]] +; CHECK-SPIRV-NEXT: FunctionParameter [[#BufferID]] [[#ParamName]] + +define spir_kernel void @foo(target("spirv.BufferSurfaceINTEL", 0) %a) #0 { + entry: + ret void + } + +attributes #0 = { noinline norecurse nounwind readnone "VCFunction"} \ No newline at end of file