From dea14c59bf3c78a9535bbbfbba3da94ef1c81e0a Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Tue, 7 Mar 2023 13:57:00 +0100 Subject: [PATCH 01/12] [Backport to 16] Add ComponentTypeInterpretation for joint matrix type (#1835) It specifies how to interpret 'Component Type' when components of a joint matrix are storages for values of different types, for example float for TF32, unsigned short for bfloat16. At this point only tf32 type interpretation is added during SPIR-V generation. Adding it to bf16 is a breaking change and requires adaptation across drivers. Spec update: intel/llvm#8175 Signed-off-by: Sidorov, Dmitry dmitry.sidorov@intel.com (cherry picked from commit b7c5218b8a3db7af8d85d6f31275d802cd181e4f) --- lib/SPIRV/SPIRVReader.cpp | 23 +- lib/SPIRV/SPIRVWriter.cpp | 52 +++-- lib/SPIRV/libSPIRV/SPIRVEnum.h | 8 + lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 8 + lib/SPIRV/libSPIRV/SPIRVType.h | 3 + lib/SPIRV/libSPIRV/spirv_internal.hpp | 16 ++ .../joint_matrix_tf32.ll | 204 ++++++++++++++++++ 7 files changed, 291 insertions(+), 23 deletions(-) create mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index 581f5ee192..e66ad333e0 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -478,9 +478,28 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool UseTPT) { (unsigned)S}; if (auto *Use = MT->getUse()) Params.push_back(static_cast(Use)->getZExtIntValue()); + auto *CTI = MT->getComponentTypeInterpretation(); + if (!CTI) + return mapType(T, getSPIRVType(internal::OpTypeJointMatrixINTEL, + transTypeToOCLTypeName(MT->getCompType()), + Params, !UseTPT)); + std::string ComponentTypeName; + switch (static_cast(CTI)->getZExtIntValue()) { + case internal::InternalJointMatrixCTI::TF32: + ComponentTypeName = "tf32"; + break; + case internal::InternalJointMatrixCTI::Bfloat16: + ComponentTypeName = "bfloat16"; + break; + case internal::InternalJointMatrixCTI::PackedInt2: + case internal::InternalJointMatrixCTI::PackedInt4: + // Do nothing just now + break; + default: + llvm_unreachable("Unexpected joint matrix component type"); + } return mapType(T, getSPIRVType(internal::OpTypeJointMatrixINTEL, - transTypeToOCLTypeName(MT->getCompType()), - Params, !UseTPT)); + ComponentTypeName, Params, !UseTPT)); } case OpTypeCooperativeMatrixKHR: { auto *MT = static_cast(T); diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp index ac3d805d4d..f1b7b9e8e0 100644 --- a/lib/SPIRV/SPIRVWriter.cpp +++ b/lib/SPIRV/SPIRVWriter.cpp @@ -700,7 +700,7 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(SPIRVType *ET, unsigned AddrSpc) { // Representation in LLVM IR before the translator is a pointer to an opaque // structure: -// %spirv.JointMatrixINTEL._%element_type%_%rows%_%cols%_%scope%_%use% +// %spirv.JointMatrixINTEL._%element_type%_%rows%_%cols%_%layout%_%scope%_%use% // Here we check the structure name yet again. Another option would be to // check SPIR-V friendly function calls (by their name) and obtain return // or their parameter types, assuming, that the appropriate types are Matrix @@ -711,6 +711,18 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(SPIRVType *ET, unsigned AddrSpc) { // simply not true. SPIRVType *LLVMToSPIRVBase::transSPIRVJointMatrixINTELType( SmallVector Postfixes) { + auto ParseInteger = [this](StringRef Postfix) -> ConstantInt * { + unsigned long long N = 0; + if (consumeUnsignedInteger(Postfix, 10, N)) + BM->getErrorLog().checkError( + false, SPIRVEC_InvalidLlvmModule, + "TypeJointMatrixINTEL expects integer parameters"); + return getUInt32(M, N); + }; + std::vector Args; + for (size_t I = 1; I != Postfixes.size(); ++I) + Args.emplace_back(transConstant(ParseInteger(Postfixes[I]))); + Type *ElemTy = nullptr; StringRef Ty{Postfixes[0]}; auto NumBits = llvm::StringSwitch(Ty) @@ -719,32 +731,30 @@ SPIRVType *LLVMToSPIRVBase::transSPIRVJointMatrixINTELType( .Case("int", 32) .Case("long", 64) .Default(0); - if (NumBits) + if (NumBits) { ElemTy = IntegerType::get(M->getContext(), NumBits); - else if (Ty == "half") + } else if (Ty == "half") { ElemTy = Type::getHalfTy(M->getContext()); - else if (Ty == "float") + } else if (Ty == "float") { ElemTy = Type::getFloatTy(M->getContext()); - else if (Ty == "double") + } else if (Ty == "double") { ElemTy = Type::getDoubleTy(M->getContext()); - else if (Ty == "bfloat16") + } else if (Ty == "bfloat16") { ElemTy = Type::getInt16Ty(M->getContext()); - else + // TODO: add BF16 CTI when we do breaking change + // auto *CTI = transConstant(getUInt32(M, static_cast( + // internal::InternalJointMatrixCTI::Bfloat16))); + // Args.push_back(CTI); + // BM->addCapability(internal::CapabilityJointMatrixBF16ComponentTypeINTEL); + } else if (Ty == "tf32") { + ElemTy = Type::getFloatTy(M->getContext()); + auto *CTI = transConstant(getUInt32( + M, static_cast(internal::InternalJointMatrixCTI::TF32))); + Args.push_back(CTI); + BM->addCapability(internal::CapabilityJointMatrixTF32ComponentTypeINTEL); + } else { llvm_unreachable("Unexpected type for matrix!"); - - auto ParseInteger = [this](StringRef Postfix) -> ConstantInt * { - unsigned long long N = 0; - if (consumeUnsignedInteger(Postfix, 10, N)) { - BM->getErrorLog().checkError( - false, SPIRVEC_InvalidLlvmModule, - "TypeJointMatrixINTEL expects integer parameters"); - return 0; - } - return getUInt32(M, N); - }; - std::vector Args; - for (size_t I = 1; I != Postfixes.size(); ++I) - Args.emplace_back(transConstant(ParseInteger(Postfixes[I]))); + } return BM->addJointMatrixINTELType(transType(ElemTy), Args); } diff --git a/lib/SPIRV/libSPIRV/SPIRVEnum.h b/lib/SPIRV/libSPIRV/SPIRVEnum.h index c691c4e4f4..4f4727fdfb 100644 --- a/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -207,6 +207,14 @@ template <> inline void SPIRVMap::init() { {CapabilitySubgroupAvcMotionEstimationIntraINTEL}); ADD_VEC_INIT(internal::CapabilityJointMatrixWIInstructionsINTEL, {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixTF32ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixBF16ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixPackedInt2ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); + ADD_VEC_INIT(internal::CapabilityJointMatrixPackedInt4ComponentTypeINTEL, + {internal::CapabilityJointMatrixINTEL}); ADD_VEC_INIT(internal::CapabilityCooperativeMatrixCheckedInstructionsINTEL, {CapabilityCooperativeMatrixKHR}); ADD_VEC_INIT(internal::CapabilityCooperativeMatrixPrefetchINTEL, diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index a0f1e4e1e2..21ddd14c03 100644 --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -652,6 +652,14 @@ template <> inline void SPIRVMap::init() { add(internal::CapabilityCacheControlsINTEL, "CacheControlsINTEL"); add(internal::CapabilityJointMatrixWIInstructionsINTEL, "JointMatrixWIInstructionsINTEL"); + add(internal::CapabilityJointMatrixTF32ComponentTypeINTEL, + "JointMatrixTF32ComponentTypeINTEL"); + add(internal::CapabilityJointMatrixBF16ComponentTypeINTEL, + "JointMatrixBF16ComponentTypeINTEL"); + add(internal::CapabilityJointMatrixPackedInt2ComponentTypeINTEL, + "JointMatrixPackedInt2ComponentTypeINTEL"); + add(internal::CapabilityJointMatrixPackedInt4ComponentTypeINTEL, + "JointMatrixPackedInt4ComponentTypeINTEL"); add(internal::CapabilityCooperativeMatrixPrefetchINTEL, "CooperativeMatrixPrefetchINTEL"); add(internal::CapabilityCooperativeMatrixCheckedInstructionsINTEL, diff --git a/lib/SPIRV/libSPIRV/SPIRVType.h b/lib/SPIRV/libSPIRV/SPIRVType.h index 9c9325d88b..c82bcf5d67 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/lib/SPIRV/libSPIRV/SPIRVType.h @@ -1090,6 +1090,9 @@ class SPIRVTypeJointMatrixINTEL : public SPIRVType { SPIRVValue *getLayout() const { return Args[2]; } SPIRVValue *getScope() const { return Args[3]; } SPIRVValue *getUse() const { return Args.size() > 4 ? Args[4] : nullptr; } + SPIRVValue *getComponentTypeInterpretation() const { + return Args.size() > 5 ? Args[5] : nullptr; + } }; class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { diff --git a/lib/SPIRV/libSPIRV/spirv_internal.hpp b/lib/SPIRV/libSPIRV/spirv_internal.hpp index b1f91405fd..9475e3de3a 100644 --- a/lib/SPIRV/libSPIRV/spirv_internal.hpp +++ b/lib/SPIRV/libSPIRV/spirv_internal.hpp @@ -113,6 +113,10 @@ enum InternalCapability { ICapabilityTensorFloat32RoundingINTEL = 6425, ICapabilityMaskedGatherScatterINTEL = 6427, ICapabilityJointMatrixWIInstructionsINTEL = 6435, + ICapabilityJointMatrixTF32ComponentTypeINTEL = 6436, + ICapabilityJointMatrixBF16ComponentTypeINTEL = 6437, + ICapabilityJointMatrixPackedInt2ComponentTypeINTEL = 6438, + ICapabilityJointMatrixPackedInt4ComponentTypeINTEL = 6439, ICapabilityCacheControlsINTEL = 6441, ICapRegisterLimitsINTEL = 6460, ICapabilityBindlessImagesINTEL = 6528 @@ -139,6 +143,14 @@ enum InternalJointMatrixLayout { enum InternalJointMatrixUse { MatrixA = 0, MatrixB = 1, Accumulator = 2 }; +enum InternalJointMatrixCTI { + None = 0, + TF32 = 1, + Bfloat16 = 2, + PackedInt2 = 3, + PackedInt4 = 4 +}; + enum InternalBuiltIn { IBuiltInSubDeviceIDINTEL = 6135, IBuiltInGlobalHWThreadIDINTEL = 6136, @@ -162,6 +174,10 @@ enum class StoreCacheControlINTEL { #define _SPIRV_OP(x, y) constexpr x x##y = static_cast(I##x##y); _SPIRV_OP(Capability, JointMatrixINTEL) _SPIRV_OP(Capability, JointMatrixWIInstructionsINTEL) +_SPIRV_OP(Capability, JointMatrixTF32ComponentTypeINTEL) +_SPIRV_OP(Capability, JointMatrixBF16ComponentTypeINTEL) +_SPIRV_OP(Capability, JointMatrixPackedInt2ComponentTypeINTEL) +_SPIRV_OP(Capability, JointMatrixPackedInt4ComponentTypeINTEL) _SPIRV_OP(Op, TypeJointMatrixINTEL) _SPIRV_OP(Op, JointMatrixLoadINTEL) _SPIRV_OP(Op, JointMatrixStoreINTEL) diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll new file mode 100644 index 0000000000..04fea81b1b --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll @@ -0,0 +1,204 @@ +; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc + +; RUN: llvm-spirv %t.bc -opaque-pointers=0 --spirv-ext=+SPV_INTEL_tensor_float32_conversion,+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis -opaque-pointers=0 < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability TensorFloat32ConversionINTEL +; CHECK-SPIRV-DAG: Capability JointMatrixINTEL +; CHECK-SPIRV-DAG: Capability JointMatrixTF32ComponentTypeINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_tensor_float32_conversion" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: TypeInt [[#TypeInt:]] 32 0 +; CHECK-SPIRV-DAG: Constant [[#TypeInt]] [[#CTI:]] 1 {{$}} +; CHECK-SPIRV-DAG: TypeFloat [[#FloatTy:]] 32 +; CHECK-SPIRV: TypeJointMatrixINTEL [[#]] [[#FloatTy]] [[#]] [[#]] [[#]] [[#]] [[#]] +; CHECK-SPIRV: TypeJointMatrixINTEL [[#]] [[#FloatTy]] [[#]] [[#]] [[#]] [[#]] [[#]] [[#CTI]] +; CHECK-SPIRV: TypeJointMatrixINTEL [[#]] [[#FloatTy]] [[#]] [[#]] [[#]] [[#]] [[#]] [[#CTI]] + +; CHECK-LLVM: %spirv.JointMatrixINTEL._float_8_16_3_3_2 = type opaque +; CHECK-LLVM: %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 = type opaque +; CHECK-LLVM: %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 = type opaque + +; ModuleID = 'matrix-tf32-test-sycl-spir64-unknown-unknown.bc' +source_filename = "matrix-tf32-test.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [2 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } +%spirv.JointMatrixINTEL._float_8_16_3_3_2 = type opaque +%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 = type opaque +%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 = type opaque + +$_ZTSZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix(float addrspace(1)* noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K, float addrspace(1)* noundef align 4 %_arg_accA, float addrspace(1)* noundef align 4 %_arg_accB, %"class.sycl::_V1::range"* noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB8, %"class.sycl::_V1::id"* noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB9) local_unnamed_addr #0 { +entry: + %agg.tmp19.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", %"class.sycl::_V1::range"* %_arg_accB8, i64 0, i32 0, i32 0, i64 1 + %agg.tmp19.sroa.0.sroa.2.0.copyload = load i64, i64* %agg.tmp19.sroa.0.sroa.2.0..sroa_idx, align 8 + %0 = getelementptr inbounds %"class.sycl::_V1::id", %"class.sycl::_V1::id"* %_arg_accB9, i64 0, i32 0, i32 0, i64 0 + %agg.tmp20.sroa.0.sroa.0.0.copyload = load i64, i64* %0, align 8 + %agg.tmp20.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::id", %"class.sycl::_V1::id"* %_arg_accB9, i64 0, i32 0, i32 0, i64 1 + %agg.tmp20.sroa.0.sroa.2.0.copyload = load i64, i64* %agg.tmp20.sroa.0.sroa.2.0..sroa_idx, align 8 + %mul.i4.i.i.i.i67 = mul i64 %agg.tmp20.sroa.0.sroa.0.0.copyload, %agg.tmp19.sroa.0.sroa.2.0.copyload + %add.i6.i.i.i.i68 = add i64 %mul.i4.i.i.i.i67, %agg.tmp20.sroa.0.sroa.2.0.copyload + %add.ptr.i69 = getelementptr inbounds float, float addrspace(1)* %_arg_accB, i64 %add.i6.i.i.i.i68 + %1 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32 + %2 = extractelement <3 x i64> %1, i64 1 + %3 = extractelement <3 x i64> %1, i64 0 + %4 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInLocalInvocationId, align 32 + %5 = extractelement <3 x i64> %4, i64 1 + %6 = extractelement <3 x i64> %4, i64 0 + %cmp.i.i = icmp ult i64 %2, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + %cmp.i136.i = icmp ult i64 %3, 2147483648 + tail call void @llvm.assume(i1 %cmp.i136.i) + %cmp.i138.i = icmp ult i64 %5, 2147483648 + tail call void @llvm.assume(i1 %cmp.i138.i) + %sub.i = sub nsw i64 %2, %5 + %cmp.i140.i = icmp ult i64 %6, 2147483648 + tail call void @llvm.assume(i1 %cmp.i140.i) + %sub5.i = sub nsw i64 %3, %6 + %mul.i = shl nsw i64 %sub.i, 3 + %mul8.i = mul i64 %mul.i, %_arg_N + %add.ptr.i.i = getelementptr inbounds float, float addrspace(1)* %_arg_accC, i64 %mul8.i + %div134.i = and i64 %sub5.i, -16 + %add.ptr.i182.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i.i, i64 %div134.i + %call.ascast.i.i = addrspacecast float addrspace(1)* %add.ptr.i182.i to float addrspace(4)* + %call1.i.i = tail call spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS2_S4_i(float addrspace(4)* noundef %call.ascast.i.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %mul17.i = mul i64 %mul.i, %_arg_K + %add.ptr.i194.i = getelementptr inbounds float, float addrspace(1)* %_arg_accA, i64 %mul17.i + %idx.neg.i.i205.i = sub i64 0, %add.i6.i.i.i.i68 + %add.ptr.i.i206334.i = getelementptr float, float addrspace(1)* %add.ptr.i69, i64 %div134.i + %add.ptr.i209333.i = getelementptr float, float addrspace(1)* %add.ptr.i.i206334.i, i64 %idx.neg.i.i205.i + br label %for.cond.i + +for.cond.i: ; preds = %for.cond.cleanup58.i, %entry + %sub_a.sroa.0.0.i = phi %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* [ undef, %entry ], [ %sub_a.sroa.0.1.i, %for.cond.cleanup58.i ] + %sub_c.sroa.0.0.i = phi %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* [ %call1.i.i, %entry ], [ %call.i168.i, %for.cond.cleanup58.i ] + %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.cond.cleanup58.i ] + %conv.i = zext i32 %k.0.i to i64 + %cmp.i = icmp ult i64 %conv.i, %_arg_K + br i1 %cmp.i, label %for.body.i, label %for.cond82.i + +for.body.i: ; preds = %for.cond.i + %add.ptr.i197.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i194.i, i64 %conv.i + %call.ascast.i148.i = addrspacecast float addrspace(1)* %add.ptr.i197.i to float addrspace(4)* + %call1.i149.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef %call.ascast.i148.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %mul26.i = mul i64 %conv.i, %_arg_N + %add.ptr.i212.i = getelementptr float, float addrspace(1)* %add.ptr.i209333.i, i64 %mul26.i + %call.ascast.i155.i = addrspacecast float addrspace(1)* %add.ptr.i212.i to float addrspace(4)* + %call1.i156.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef %call.ascast.i155.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + br label %for.cond30.i + +for.cond30.i: ; preds = %for.body37.i, %for.body.i + %sub_a.sroa.0.1.i = phi %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* [ %call1.i149.i, %for.body.i ], [ %call.i225.i, %for.body37.i ] + %i.0.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body37.i ] + %conv31.i = zext i32 %i.0.i to i64 + %call.i215.i = tail call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i) #3 + %cmp35.i = icmp ugt i64 %call.i215.i, %conv31.i + br i1 %cmp35.i, label %for.body37.i, label %for.cond52.i + +for.body37.i: ; preds = %for.cond30.i + %call.i218.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, i64 noundef %conv31.i) #3 + %call.i.i = tail call spir_func noundef float @_Z27__spirv_ConvertFToTF32INTELf(float noundef %call.i218.i) #3 + %call.i225.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, float noundef %call.i.i, i64 noundef %conv31.i) #3 + %inc.i = add nuw nsw i32 %i.0.i, 1 + br label %for.cond30.i + +for.cond52.i: ; preds = %for.cond30.i, %for.body59.i + %sub_b.sroa.0.0.i = phi %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* [ %call.i243.i, %for.body59.i ], [ %call1.i156.i, %for.cond30.i ] + %i51.0.i = phi i32 [ %inc74.i, %for.body59.i ], [ 0, %for.cond30.i ] + %conv53.i = zext i32 %i51.0.i to i64 + %call.i229.i = tail call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i) #3 + %cmp57.i = icmp ugt i64 %call.i229.i, %conv53.i + br i1 %cmp57.i, label %for.body59.i, label %for.cond.cleanup58.i + +for.cond.cleanup58.i: ; preds = %for.cond52.i + %call.i168.i = tail call spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS9_1ELS9_2ELNS8_12MatrixLayoutE0ELSA_0ELSA_3ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSD_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSD_ISH_XT2_EXT3_EXT8_EXT10_EXT5_EEESG_SC_(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef %sub_c.sroa.0.0.i, i32 noundef 3) #3 + %add.i = add nuw nsw i32 %k.0.i, 16 + br label %for.cond.i + +for.body59.i: ; preds = %for.cond52.i + %call.i236.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, i64 noundef %conv53.i) #3 + %call.i171.i = tail call spir_func noundef float @_Z27__spirv_ConvertFToTF32INTELf(float noundef %call.i236.i) #3 + %call.i243.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, float noundef %call.i171.i, i64 noundef %conv53.i) #3 + %inc74.i = add nuw nsw i32 %i51.0.i, 1 + br label %for.cond52.i + +for.cond82.i: ; preds = %for.cond.i, %for.body87.i + %sub_a.sroa.0.2.i = phi %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* [ %call5.i.i, %for.body87.i ], [ %sub_a.sroa.0.0.i, %for.cond.i ] + %i81.0.i = phi i32 [ %inc96.i, %for.body87.i ], [ 0, %for.cond.i ] + %conv83.i = zext i32 %i81.0.i to i64 + %call.i247.i = tail call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i) #3 + %cmp85.i = icmp ugt i64 %call.i247.i, %conv83.i + br i1 %cmp85.i, label %for.body87.i, label %_ZZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit + +for.body87.i: ; preds = %for.cond82.i + %call.i269.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i, i64 noundef %conv83.i) #3 + %call.i276.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i, i64 noundef %conv83.i) #3 + %mul.i.i = fmul float %call.i276.i, 2.000000e+00 + %call5.i.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i, float noundef %mul.i.i, i64 noundef %conv83.i) #3 + %inc96.i = add nuw nsw i32 %i81.0.i, 1 + br label %for.cond82.i + +_ZZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond82.i + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS2_S4_i(float addrspace(4)* noundef %call.ascast.i.i, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef %sub_c.sroa.0.0.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + call void @__itt_offload_wi_finish_wrapper() + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #1 + +; Function Attrs: convergent +declare dso_local spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS2_S4_i(float addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef float @_Z27__spirv_ConvertFToTF32INTELf(float noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef, i64 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef, float noundef, i64 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef, i64 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef, float noundef, i64 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS9_1ELS9_2ELNS8_12MatrixLayoutE0ELSA_0ELSA_3ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSD_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSD_ISH_XT2_EXT3_EXT8_EXT10_EXT5_EEESG_SC_(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef, %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS2_S4_i(float addrspace(4)* noundef, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +declare void @__itt_offload_wi_finish_wrapper() + +attributes #0 = { convergent norecurse "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="matrix-tf32-test.cpp" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent } From 7a457b6d037e2b224e7ecd97ca13f11d812b0950 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Tue, 28 Mar 2023 21:58:02 +0200 Subject: [PATCH 02/12] [Backport to 16] Rename ConvertFToTF32INTEL to RoundFToTF32INTEL (#1913) Extension name will be preserved for a while for binary compatibility. Signed-off-by: Sidorov, Dmitry (cherry picked from commit 68855f60b26fd630175b4c8696d2a6a7939f21f5) --- .../INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll index 04fea81b1b..5dbb67fb8c 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll @@ -7,7 +7,7 @@ ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc ; RUN: llvm-dis -opaque-pointers=0 < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM -; CHECK-SPIRV-DAG: Capability TensorFloat32ConversionINTEL +; CHECK-SPIRV-DAG: Capability TensorFloat32RoundingINTEL ; CHECK-SPIRV-DAG: Capability JointMatrixINTEL ; CHECK-SPIRV-DAG: Capability JointMatrixTF32ComponentTypeINTEL ; CHECK-SPIRV-DAG: Extension "SPV_INTEL_tensor_float32_conversion" @@ -110,7 +110,7 @@ for.cond30.i: ; preds = %for.body37.i, %for. for.body37.i: ; preds = %for.cond30.i %call.i218.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, i64 noundef %conv31.i) #3 - %call.i.i = tail call spir_func noundef float @_Z27__spirv_ConvertFToTF32INTELf(float noundef %call.i218.i) #3 + %call.i.i = tail call spir_func noundef float @_Z25__spirv_RoundFToTF32INTELf(float noundef %call.i218.i) #3 %call.i225.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, float noundef %call.i.i, i64 noundef %conv31.i) #3 %inc.i = add nuw nsw i32 %i.0.i, 1 br label %for.cond30.i @@ -130,7 +130,7 @@ for.cond.cleanup58.i: ; preds = %for.cond52.i for.body59.i: ; preds = %for.cond52.i %call.i236.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, i64 noundef %conv53.i) #3 - %call.i171.i = tail call spir_func noundef float @_Z27__spirv_ConvertFToTF32INTELf(float noundef %call.i236.i) #3 + %call.i171.i = tail call spir_func noundef float @_Z25__spirv_RoundFToTF32INTELf(float noundef %call.i236.i) #3 %call.i243.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, float noundef %call.i171.i, i64 noundef %conv53.i) #3 %inc74.i = add nuw nsw i32 %i51.0.i, 1 br label %for.cond52.i @@ -173,7 +173,7 @@ declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 ad declare dso_local spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef float @_Z27__spirv_ConvertFToTF32INTELf(float noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef float @_Z25__spirv_RoundFToTF32INTELf(float noundef) local_unnamed_addr #2 ; Function Attrs: convergent declare dso_local spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef, i64 noundef) local_unnamed_addr #2 From b6061e3bc7e185a1655b0c9011c52e8fa7cbd086 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Thu, 16 Nov 2023 16:45:41 +0100 Subject: [PATCH 03/12] [Backport to 16] Enable BFloat16 and TensorFloat32 conversions for cooperative matrices (#2213) Previously added scalar/vector ConvertFToBF16INTEL, ConvertBF16ToFINTEL and RoundFToTF32INTEL conversions are now enabled for cooperative matrix type under SPV_INTEL_joint_matrix extension following the spec: https://github.com/intel/llvm/blob/sycl/sycl/doc/design/spirv-extensions/SPV_INTEL_joint_matrix.asciidoc Note, joint matrices are not allowed as input/output for these conversions as it is being deprecated. Signed-off-by: Sidorov, Dmitry (cherry picked from commit 1010efc8bd9e367597b2c677a0b0df0c14e7b051) --- lib/SPIRV/libSPIRV/SPIRVInstruction.h | 53 ++++++++++++- .../bf16_conversion_instructions.ll | 79 +++++++++++++++++++ .../tf32_conversion_instructions.ll | 53 +++++++++++++ 3 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll create mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index c6fc1bd678..d5842040c7 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -3406,10 +3406,17 @@ template class SPIRVBfloat16ConversionINTELInstBase : public SPIRVUnaryInst { protected: SPIRVCapVec getRequiredCapability() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + return getVec(internal::CapabilityBfloat16ConversionINTEL, + internal::CapabilityJointMatrixBF16ComponentTypeINTEL); return getVec(internal::CapabilityBfloat16ConversionINTEL); } std::optional getRequiredExtension() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + this->getModule()->addExtension(ExtensionID::SPV_INTEL_joint_matrix); return ExtensionID::SPV_INTEL_bfloat16_conversion; } @@ -3438,8 +3445,25 @@ class SPIRVBfloat16ConversionINTELInstBase : public SPIRVUnaryInst { } auto InstName = OpCodeNameMap::map(OC); - SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + auto *Module = this->getModule(); + SPIRVErrorLog &SPVErrLog = Module->getErrorLog(); + // Cooperative matrix type is allowed as input/output of the instruction + // if SPV_INTEL_joint_matrix is enabled + if (ResCompTy->isTypeCooperativeMatrixKHR()) { + SPVErrLog.checkError( + Module->isAllowedToUseExtension(ExtensionID::SPV_INTEL_joint_matrix), + SPIRVEC_InvalidInstruction, + InstName + "\nCan be used with " + "cooperative matrices only when SPV_INTEL_joint_matrix is " + "enabled\n"); + assert(InCompTy->isTypeCooperativeMatrixKHR() && + "Input must also be a cooperative matrix"); + ResCompTy = static_cast(ResCompTy) + ->getCompType(); + InCompTy = + static_cast(InCompTy)->getCompType(); + } if (OC == internal::OpConvertFToBF16INTEL) { SPVErrLog.checkError( ResCompTy->isTypeInt(16), SPIRVEC_InvalidInstruction, @@ -3813,10 +3837,17 @@ template class SPIRVTensorFloat32RoundingINTELInstBase : public SPIRVUnaryInst { protected: SPIRVCapVec getRequiredCapability() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + return getVec(internal::CapabilityTensorFloat32RoundingINTEL, + internal::CapabilityJointMatrixTF32ComponentTypeINTEL); return getVec(internal::CapabilityTensorFloat32RoundingINTEL); } std::optional getRequiredExtension() const override { + SPIRVType *ResCompTy = this->getType(); + if (ResCompTy->isTypeCooperativeMatrixKHR()) + this->getModule()->addExtension(ExtensionID::SPV_INTEL_joint_matrix); return ExtensionID::SPV_INTEL_tensor_float32_conversion; } @@ -3845,7 +3876,25 @@ class SPIRVTensorFloat32RoundingINTELInstBase : public SPIRVUnaryInst { } auto InstName = OpCodeNameMap::map(OC); - SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + auto *Module = this->getModule(); + SPIRVErrorLog &SPVErrLog = Module->getErrorLog(); + + // Cooperative matrix type is allowed as input/output of the instruction + // if SPV_INTEL_joint_matrix is enabled + if (ResCompTy->isTypeCooperativeMatrixKHR()) { + SPVErrLog.checkError( + Module->isAllowedToUseExtension(ExtensionID::SPV_INTEL_joint_matrix), + SPIRVEC_InvalidInstruction, + InstName + "\nCan be used with " + "cooperative matrices only when SPV_INTEL_joint_matrix is " + "enabled\n"); + assert(InCompTy->isTypeCooperativeMatrixKHR() && + "Input must also be a cooperative matrix"); + ResCompTy = static_cast(ResCompTy) + ->getCompType(); + InCompTy = + static_cast(InCompTy)->getCompType(); + } SPVErrLog.checkError( ResCompTy->isTypeFloat(32), SPIRVEC_InvalidInstruction, diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll new file mode 100644 index 0000000000..eb1d1afe51 --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll @@ -0,0 +1,79 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_joint_matrix,+SPV_INTEL_bfloat16_conversion -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-OCL-IR + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc --spirv-target-env=SPV-IR +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR + +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_bfloat16_conversion 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR-NEXT: ConvertFToBF16INTEL +; CHECK-ERROR-NEXT: Can be used with cooperative matrices only when SPV_INTEL_joint_matrix is enabled + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Capability Bfloat16ConversionINTEL +; CHECK-SPIRV-DAG: Capability JointMatrixBF16ComponentTypeINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_bfloat16_conversion" +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: TypeInt [[#ShortTy:]] 16 0 +; CHECK-SPIRV-DAG: TypeFloat [[#FP32Ty:]] 32 +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#FP32MatTy:]] [[#FP32Ty]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#ShortMatTy:]] [[#ShortTy]] +; CHECK-SPIRV: CompositeConstruct [[#FP32MatTy]] [[#FP32Mat:]] +; CHECK-SPIRV: ConvertFToBF16INTEL [[#ShortMatTy]] [[#]] [[#FP32Mat]] +; CHECK-SPIRV: CompositeConstruct [[#ShortMatTy]] [[#ShortMat:]] +; CHECK-SPIRV: ConvertBF16ToFINTEL [[#FP32MatTy]] [[#]] [[#ShortMat]] + +; CHECK-OCL-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z32intel_convert_bfloat16_as_ushortPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#FP32Matrix]]) +; CHECK-OCL-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z31intel_convert_as_bfloat16_floatPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#ShortMatrix]]) + + +; CHECK-SPV-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z27__spirv_ConvertFToBF16INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#FP32Matrix]]) +; CHECK-SPV-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z27__spirv_ConvertBF16ToFINTELPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#ShortMatrix]]) + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_bf() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + ret void +} + +define void @convert_bf_to_f() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %0) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) noundef) + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"PIE Level", i32 2} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{!"clang version 17.0.0"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll new file mode 100644 index 0000000000..6392c94138 --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll @@ -0,0 +1,53 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_joint_matrix,+SPV_INTEL_tensor_float32_conversion -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_tensor_float32_conversion 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-ERROR + +; CHECK-ERROR: InvalidInstruction: Can't translate llvm instruction: +; CHECK-ERROR-NEXT: RoundFToTF32INTEL +; CHECK-ERROR-NEXT: Can be used with cooperative matrices only when SPV_INTEL_joint_matrix is enabled + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Capability TensorFloat32RoundingINTEL +; CHECK-SPIRV-DAG: Capability JointMatrixTF32ComponentTypeINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_tensor_float32_conversion" +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: TypeFloat [[#FP32Ty:]] 32 +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#FP32MatTy:]] [[#FP32Ty]] +; CHECK-SPIRV: CompositeConstruct [[#FP32MatTy]] [[#FP32Mat:]] +; CHECK-SPIRV: RoundFToTF32INTEL [[#FP32MatTy]] [[#]] [[#FP32Mat]] + +; CHECK-LLVM: %[[#Mat:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z25__spirv_RoundFToTF32INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#Mat]]) + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_tf() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"PIE Level", i32 2} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{!"clang version 17.0.0"} From 1d8c9b144ac6d9fc93d023aaa6536404234d896d Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy <89994100+VyacheslavLevytskyy@users.noreply.github.com> Date: Fri, 17 Nov 2023 20:32:57 +0100 Subject: [PATCH 04/12] [Backport to 16] Add OpCooperativeMatrixApplyFunctionINTEL instruction (#2214) This PR aims to introduce entities related to OpCooperativeMatrixApplyFunctionINTEL in llvm-spirv translator, according to https://github.com/intel/llvm/blob/sycl/sycl/doc/design/spirv-extensions/SPV_INTEL_joint_matrix.asciidoc. Co-authored-by: Sidorov, Dmitry (cherry picked from commit 467edf99c59756a7100793d46e5c692654469ede) --- lib/SPIRV/libSPIRV/SPIRVEnum.h | 2 + lib/SPIRV/libSPIRV/SPIRVInstruction.h | 20 +++ lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 2 + lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h | 2 + lib/SPIRV/libSPIRV/spirv_internal.hpp | 5 + .../cooperative_matrix_apply.ll | 149 ++++++++++++++++++ 6 files changed, 180 insertions(+) create mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll diff --git a/lib/SPIRV/libSPIRV/SPIRVEnum.h b/lib/SPIRV/libSPIRV/SPIRVEnum.h index 4f4727fdfb..8bbb3b7bb0 100644 --- a/lib/SPIRV/libSPIRV/SPIRVEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVEnum.h @@ -219,6 +219,8 @@ template <> inline void SPIRVMap::init() { {CapabilityCooperativeMatrixKHR}); ADD_VEC_INIT(internal::CapabilityCooperativeMatrixPrefetchINTEL, {CapabilityCooperativeMatrixKHR}); + ADD_VEC_INIT(internal::CapabilityCooperativeMatrixInvocationInstructionsINTEL, + {CapabilityCooperativeMatrixKHR}); } template <> inline void SPIRVMap::init() { diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index d5842040c7..2fa007ae96 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -3556,6 +3556,26 @@ class SPIRVCooperativeMatrixPrefetchINTELInstBase _SPIRV_OP(CooperativeMatrixPrefetch, false, 8, true, 5) #undef _SPIRV_OP +class SPIRVCooperativeMatrixInvocationInstructionsINTELInstBase + : public SPIRVInstTemplateBase { +protected: + std::optional getRequiredExtension() const override { + return ExtensionID::SPV_INTEL_joint_matrix; + } + SPIRVCapVec getRequiredCapability() const override { + return getVec( + internal::CapabilityCooperativeMatrixInvocationInstructionsINTEL); + } +}; + +#define _SPIRV_OP(x, ...) \ + typedef SPIRVInstTemplate< \ + SPIRVCooperativeMatrixInvocationInstructionsINTELInstBase, \ + internal::Op##x##INTEL, __VA_ARGS__> \ + SPIRV##x##INTEL; +_SPIRV_OP(CooperativeMatrixApplyFunction, true, 5) +#undef _SPIRV_OP + class SPIRVCooperativeMatrixKHRInstBase : public SPIRVInstTemplateBase { protected: std::optional getRequiredExtension() const override { diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index 21ddd14c03..b37434796f 100644 --- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -662,6 +662,8 @@ template <> inline void SPIRVMap::init() { "JointMatrixPackedInt4ComponentTypeINTEL"); add(internal::CapabilityCooperativeMatrixPrefetchINTEL, "CooperativeMatrixPrefetchINTEL"); + add(internal::CapabilityCooperativeMatrixInvocationInstructionsINTEL, + "CooperativeMatrixInvocationInstructionsINTEL"); add(internal::CapabilityCooperativeMatrixCheckedInstructionsINTEL, "CooperativeMatrixCheckedInstructionsINTEL"); add(internal::CapabilityBindlessImagesINTEL, "BindlessImagesINTEL"); diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h index 9fb2825b1b..30db62d097 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h @@ -24,6 +24,8 @@ _SPIRV_OP_INTERNAL(CooperativeMatrixConstructCheckedINTEL, internal::OpCooperativeMatrixConstructCheckedINTEL) _SPIRV_OP_INTERNAL(CooperativeMatrixPrefetchINTEL, internal::OpCooperativeMatrixPrefetchINTEL) +_SPIRV_OP_INTERNAL(CooperativeMatrixApplyFunctionINTEL, + internal::OpCooperativeMatrixApplyFunctionINTEL) _SPIRV_OP_INTERNAL(ComplexFMulINTEL, internal::ComplexFMulINTEL) _SPIRV_OP_INTERNAL(ComplexFDivINTEL, internal::ComplexFDivINTEL) _SPIRV_OP_INTERNAL(MaskedGatherINTEL, internal::OpMaskedGatherINTEL) diff --git a/lib/SPIRV/libSPIRV/spirv_internal.hpp b/lib/SPIRV/libSPIRV/spirv_internal.hpp index 9475e3de3a..3330ae1395 100644 --- a/lib/SPIRV/libSPIRV/spirv_internal.hpp +++ b/lib/SPIRV/libSPIRV/spirv_internal.hpp @@ -79,6 +79,7 @@ enum InternalOp { IOpMaskedGatherINTEL = 6428, IOpMaskedScatterINTEL = 6429, IOpJointMatrixGetElementCoordINTEL = 6440, + IOpCooperativeMatrixApplyFunctionINTEL = 6448, IOpCooperativeMatrixPrefetchINTEL = 6449, IOpConvertHandleToImageINTEL = 6529, IOpConvertHandleToSamplerINTEL = 6530, @@ -113,6 +114,7 @@ enum InternalCapability { ICapabilityTensorFloat32RoundingINTEL = 6425, ICapabilityMaskedGatherScatterINTEL = 6427, ICapabilityJointMatrixWIInstructionsINTEL = 6435, + ICapabilityCooperativeMatrixInvocationInstructionsINTEL = 6435, ICapabilityJointMatrixTF32ComponentTypeINTEL = 6436, ICapabilityJointMatrixBF16ComponentTypeINTEL = 6437, ICapabilityJointMatrixPackedInt2ComponentTypeINTEL = 6438, @@ -195,6 +197,9 @@ _SPIRV_OP(Op, CooperativeMatrixConstructCheckedINTEL) _SPIRV_OP(Capability, CooperativeMatrixPrefetchINTEL) _SPIRV_OP(Op, CooperativeMatrixPrefetchINTEL) +_SPIRV_OP(Capability, CooperativeMatrixInvocationInstructionsINTEL) +_SPIRV_OP(Op, CooperativeMatrixApplyFunctionINTEL) + _SPIRV_OP(Capability, HWThreadQueryINTEL) _SPIRV_OP(BuiltIn, SubDeviceIDINTEL) _SPIRV_OP(BuiltIn, GlobalHWThreadIDINTEL) diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll new file mode 100644 index 0000000000..f85a5f0cc8 --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll @@ -0,0 +1,149 @@ +;; compiled from joint_matrix_apply_bf16.cpp from intel/llvm with some modifications + +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix,+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Capability CooperativeMatrixInvocationInstructionsINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy:]] +; CHECK-SPIRV: CompositeConstruct [[#MatTy]] [[#Mat:]] +; CHECK-SPIRV: PtrCastToGeneric [[#]] [[#Ptr:]] [[#]] +; CHECK-SPIRV: CooperativeMatrixApplyFunctionINTEL [[#MatTy]] [[#Apply:]] [[#Ptr]] [[#Mat]] +; CHECK-SPIRV: CooperativeMatrixStoreKHR [[#]] [[#Apply]] + +; CHECK-LLVM: %[[Mat:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @"_Z26__spirv_CompositeConstructP38class.sycl::_V1::ext::oneapi::bfloat16" +; CHECK-LLVM: %[[Apply:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @"_Z43__spirv_CooperativeMatrixApplyFunctionINTELPU3AS477class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapperPU3AS144__spirv_CooperativeMatrixKHR__short_8_16_0_0"(ptr addrspace(4) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) %[[Mat]]) +; CHECK-LLVM: call spir_func void @"_Z33__spirv_CooperativeMatrixStoreKHRPU3AS138class.sycl::_V1::ext::oneapi::bfloat16PU3AS144__spirv_CooperativeMatrixKHR__short_8_16_0_0liii"(ptr addrspace(1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) %[[Apply]], i64 32, i32 0, i32 3, i32 0) + +; ModuleID = 'matrix_apply.bc' +source_filename = "../llvm/sycl/test-e2e/Matrix/joint_matrix_apply_bf16.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [2 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapper" = type { ptr addrspace(4) } +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } +%class.anon.0 = type <{ %"class.sycl::_V1::accessor", %class.anon, [7 x i8] }> +%"class.sycl::_V1::accessor" = type { %"class.sycl::_V1::detail::AccessorImplDevice", %union.anon } +%"class.sycl::_V1::detail::AccessorImplDevice" = type { %"class.sycl::_V1::id", %"class.sycl::_V1::range", %"class.sycl::_V1::range" } +%union.anon = type { ptr addrspace(1) } +%class.anon = type { i8 } + +$_ZTSZZ17matrix_verify_addIN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32EZ4mainEUlRS4_E_EvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfOT2_ENKUlRNS1_7handlerEE_clESI_EUlNS1_7nd_itemILi2EEEE_ = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ17matrix_verify_addIN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32EZ4mainEUlRS4_E_EvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfOT2_ENKUlRNS1_7handlerEE_clESI_EUlNS1_7nd_itemILi2EEEE_(ptr addrspace(1) noundef align 2 %_arg_accA, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accA1, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accA2, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accA3) local_unnamed_addr { +entry: + %ref.tmp.i20 = alloca %"class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapper", align 8 + %agg.tmp.i17 = alloca %"class.sycl::_V1::ext::oneapi::bfloat16", align 2 + %ref.tmp6.i = alloca float, align 4 + %__SYCLKernel = alloca %class.anon.0, align 8 + %__SYCLKernel.ascast = addrspacecast ptr %__SYCLKernel to ptr addrspace(4) + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %__SYCLKernel) + %agg.tmp.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accA1, align 8 + %agg.tmp.sroa.0.sroa.2.0._arg_accA1.ascast.sroa_idx = getelementptr inbounds i8, ptr %_arg_accA1, i64 8 + %agg.tmp.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp.sroa.0.sroa.2.0._arg_accA1.ascast.sroa_idx, align 8 + %agg.tmp5.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accA2, align 8 + %agg.tmp5.sroa.0.sroa.2.0._arg_accA2.ascast.sroa_idx = getelementptr inbounds i8, ptr %_arg_accA2, i64 8 + %agg.tmp5.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp5.sroa.0.sroa.2.0._arg_accA2.ascast.sroa_idx, align 8 + %agg.tmp6.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accA3, align 8 + %agg.tmp6.sroa.0.sroa.2.0._arg_accA3.ascast.sroa_idx = getelementptr inbounds i8, ptr %_arg_accA3, i64 8 + %agg.tmp6.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp6.sroa.0.sroa.2.0._arg_accA3.ascast.sroa_idx, align 8 + %0 = getelementptr inbounds %"class.sycl::_V1::accessor", ptr %__SYCLKernel, i64 0, i32 1 + store i64 %agg.tmp6.sroa.0.sroa.0.0.copyload, ptr %__SYCLKernel, align 8 + %AccessRange.i.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 1 + store i64 %agg.tmp.sroa.0.sroa.0.0.copyload, ptr %AccessRange.i.i.i.i.i, align 8 + %MemRange.i.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 2 + store i64 %agg.tmp5.sroa.0.sroa.0.0.copyload, ptr %MemRange.i.i.i.i.i, align 8 + %arrayidx.i21.i.i.i.i = getelementptr inbounds [2 x i64], ptr %__SYCLKernel, i64 0, i64 1 + store i64 %agg.tmp6.sroa.0.sroa.2.0.copyload, ptr %arrayidx.i21.i.i.i.i, align 8 + %arrayidx.i25.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 1, i32 0, i32 0, i64 1 + store i64 %agg.tmp.sroa.0.sroa.2.0.copyload, ptr %arrayidx.i25.i.i.i.i, align 8 + %arrayidx.i29.i.i.i.i = getelementptr inbounds %"class.sycl::_V1::detail::AccessorImplDevice", ptr %__SYCLKernel, i64 0, i32 2, i32 0, i32 0, i64 1 + store i64 %agg.tmp5.sroa.0.sroa.2.0.copyload, ptr %arrayidx.i29.i.i.i.i, align 8 + %mul.i6.i.i.i.i = mul i64 %agg.tmp6.sroa.0.sroa.0.0.copyload, %agg.tmp5.sroa.0.sroa.2.0.copyload + %1 = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %_arg_accA, i64 %mul.i6.i.i.i.i + %add.ptr.i = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %1, i64 %agg.tmp6.sroa.0.sroa.2.0.copyload + store ptr addrspace(1) %add.ptr.i, ptr %0, align 8 + %2 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 8), align 8 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %4 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8 + %5 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %ref.tmp6.ascast.i = addrspacecast ptr %ref.tmp6.i to ptr addrspace(4) + %cmp.i11 = icmp ult i64 %2, 2147483648 + %cmp.i = icmp ult i64 %3, 2147483648 + %cmp.i15 = icmp ult i64 %4, 2147483648 + %sub.i = sub nsw i64 %2, %4 + %cmp.i12 = icmp ult i64 %5, 2147483648 + %sub5.i = sub nsw i64 %3, %5 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %ref.tmp6.i) + store float 5.000000e+00, ptr %ref.tmp6.i, align 4 + %call.i.i = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) %ref.tmp6.ascast.i) + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %agg.tmp.i17) + store i16 %call.i.i, ptr %agg.tmp.i17, align 2 + %call.i18 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z26__spirv_CompositeConstruct(ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2 %agg.tmp.i17) + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %agg.tmp.i17) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %ref.tmp6.i) + %lambda.i = getelementptr inbounds %class.anon.0, ptr addrspace(4) %__SYCLKernel.ascast, i64 0, i32 1 + %ref.tmp.ascast.i21 = addrspacecast ptr %ref.tmp.i20 to ptr addrspace(4) + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp.i20) + store ptr addrspace(4) %lambda.i, ptr %ref.tmp.i20, align 8 + %call.i22 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef %call.i18) + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp.i20) + %6 = load ptr addrspace(1), ptr %0, align 8 + %7 = load i64, ptr %__SYCLKernel, align 8 + %8 = load i64, ptr %arrayidx.i29.i.i.i.i, align 8 + %mul.i6.i.i.i.i.i = mul i64 %7, %8 + %9 = load i64, ptr %arrayidx.i21.i.i.i.i, align 8 + %add.i7.i.i.i.i.i = add i64 %mul.i6.i.i.i.i.i, %9 + %idx.neg.i.i = sub i64 0, %add.i7.i.i.i.i.i + %add.ptr.i.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %6, i64 %idx.neg.i.i + %mul12.i = shl nsw i64 %sub.i, 8 + %add.ptr.i43 = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i.i, i64 %mul12.i + %div14.i = and i64 %sub5.i, -16 + %add.ptr.i44 = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i43, i64 %div14.i + call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef %add.ptr.i44, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef %call.i22, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %__SYCLKernel) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z26__spirv_CompositeConstruct(ptr noundef byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2) local_unnamed_addr + +; Function Attrs: convergent nounwind +declare dso_local spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4)) local_unnamed_addr + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8), target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef) local_unnamed_addr + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git)"} From 40cceec819fd5518887eef7a8fdd2c68a2d1cc46 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Thu, 11 May 2023 19:45:29 -0400 Subject: [PATCH 05/12] [Backport to 16] Support the spirv.BufferSurfaceINTEL target extension type (#1995) This target extension type is created here: https://github.com/intel/vc-intrinsics/blob/master/GenXIntrinsics/lib/GenXIntrinsics/GenXSPIRVWriterAdaptor.cpp#L245 As with other target extension types, reverse translation is not yet supported. Signed-off-by: Sarnie, Nick Co-authored-by: Victor Mustya (cherry picked from commit 60746d54a5b9af6dd791dfcc90398e06b60c46bc) (cherry picked from commit ac6aa17dc92885d9ecbe65fb7d0fb6f75ace2b9e) --- lib/SPIRV/SPIRVInternal.h | 2 ++ test/transcoding/spirv-target-types-buffer.ll | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 test/transcoding/spirv-target-types-buffer.ll diff --git a/lib/SPIRV/SPIRVInternal.h b/lib/SPIRV/SPIRVInternal.h index ced8c147a6..b00938255e 100644 --- a/lib/SPIRV/SPIRVInternal.h +++ b/lib/SPIRV/SPIRVInternal.h @@ -318,6 +318,7 @@ const static char PipeStorage[] = "PipeStorage"; const static char ConstantPipeStorage[] = "ConstantPipeStorage"; const static char VmeImageINTEL[] = "VmeImageINTEL"; const static char JointMatrixINTEL[] = "JointMatrixINTEL"; +const static char BufferSurfaceINTEL[] = "BufferSurfaceINTEL"; const static char CooperativeMatrixKHR[] = "CooperativeMatrixKHR"; } // namespace kSPIRVTypeName @@ -976,6 +977,7 @@ template <> inline void SPIRVMap::init() { _SPIRV_OP(AvcRefResultINTEL) _SPIRV_OP(AvcSicResultINTEL) _SPIRV_OP(VmeImageINTEL) + _SPIRV_OP(BufferSurfaceINTEL) _SPIRV_OP(CooperativeMatrixKHR) #undef _SPIRV_OP add("JointMatrixINTEL", internal::OpTypeJointMatrixINTEL); diff --git a/test/transcoding/spirv-target-types-buffer.ll b/test/transcoding/spirv-target-types-buffer.ll new file mode 100644 index 0000000000..4de5ad32fb --- /dev/null +++ b/test/transcoding/spirv-target-types-buffer.ll @@ -0,0 +1,23 @@ +; Check translation of the buffer surface target extension type +; +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv -spirv-ext=+SPV_INTEL_vector_compute %t.bc -spirv-text -o %t.spv.txt +; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +; CHECK-SPIRV: Capability VectorComputeINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_vector_compute" +; CHECK-SPIRV: Name [[#FuncName:]] "foo" +; CHECK-SPIRV: Name [[#ParamName:]] "a" +; CHECK-SPIRV: TypeVoid [[#VoidT:]] +; CHECK-SPIRV: TypeBufferSurfaceINTEL [[#BufferID:]] +; CHECK-SPIRV: Function [[#VoidT]] [[#FuncID:]] +; CHECK-SPIRV-NEXT: FunctionParameter [[#BufferID]] [[#ParamName]] + +define spir_kernel void @foo(target("spirv.BufferSurfaceINTEL", 0) %a) #0 { + entry: + ret void + } + +attributes #0 = { noinline norecurse nounwind readnone "VCFunction"} \ No newline at end of file From 08a6af3d4379dabfc63883ef23c8ff03e5ab726a Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Sun, 23 Jul 2023 15:00:03 +0200 Subject: [PATCH 06/12] [Backport to 16] [OpaquePointers] Rewrite joint_matrix tests (#2088) This patch adds joint_matrix reverse translation to target extension type and starts rewriting all of the tests. Some tests are being removed as outdated Remaining tests to add after the patch: 1. tf32 test 2. element wise operations test Signed-off-by: Sidorov, Dmitry (cherry picked from commit 465eb3c68a1bc0063799ebed2a8c2564d2460bd6) --- lib/SPIRV/SPIRVReader.cpp | 31 +- lib/SPIRV/SPIRVWriter.cpp | 79 +--- .../SPV_INTEL_joint_matrix/joint_matrix.ll | 297 ++++++--------- .../joint_matrix_bfloat16.ll | 356 +++++++++--------- .../joint_matrix_element.ll | 121 ------ ...rix_extract_insert_element_of_sycl_half.ll | 130 ------- .../joint_matrix_half.ll | 304 ++++++++------- .../joint_matrix_tf32.ll | 204 ---------- .../opaque_joint_matrix.ll | 151 -------- .../sycl_2020_namespace.ll | 24 -- 10 files changed, 480 insertions(+), 1217 deletions(-) delete mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_element.ll delete mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll delete mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll delete mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/opaque_joint_matrix.ll delete mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/sycl_2020_namespace.ll diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index e66ad333e0..4ee8151495 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -480,26 +480,17 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool UseTPT) { Params.push_back(static_cast(Use)->getZExtIntValue()); auto *CTI = MT->getComponentTypeInterpretation(); if (!CTI) - return mapType(T, getSPIRVType(internal::OpTypeJointMatrixINTEL, - transTypeToOCLTypeName(MT->getCompType()), - Params, !UseTPT)); - std::string ComponentTypeName; - switch (static_cast(CTI)->getZExtIntValue()) { - case internal::InternalJointMatrixCTI::TF32: - ComponentTypeName = "tf32"; - break; - case internal::InternalJointMatrixCTI::Bfloat16: - ComponentTypeName = "bfloat16"; - break; - case internal::InternalJointMatrixCTI::PackedInt2: - case internal::InternalJointMatrixCTI::PackedInt4: - // Do nothing just now - break; - default: - llvm_unreachable("Unexpected joint matrix component type"); - } - return mapType(T, getSPIRVType(internal::OpTypeJointMatrixINTEL, - ComponentTypeName, Params, !UseTPT)); + return mapType( + T, llvm::TargetExtType::get(*Context, "spirv.JointMatrixINTEL", + transType(MT->getCompType()), Params)); + const unsigned CTIValue = + static_cast(CTI)->getZExtIntValue(); + assert(CTIValue <= internal::InternalJointMatrixCTI::PackedInt4 && + "Unknown matrix component type interpretation"); + Params.push_back(CTIValue); + return mapType( + T, llvm::TargetExtType::get(*Context, "spirv.JointMatrixINTEL", + transType(MT->getCompType()), Params)); } case OpTypeCooperativeMatrixKHR: { auto *MT = static_cast(T); diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp index f1b7b9e8e0..268d356bc4 100644 --- a/lib/SPIRV/SPIRVWriter.cpp +++ b/lib/SPIRV/SPIRVWriter.cpp @@ -649,21 +649,6 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(Type *ET, unsigned AddrSpc) { transType(ET))); } } else { - // JointMatrixINTEL type is not necessarily an opaque type, it can be - // represented as a structure with pointer to a multidimensional array - // member. - if (ST && ST->hasName()) { - StringRef STName = ST->getName(); - if (STName.startswith(kSPIRVTypeName::PrefixAndDelim)) { - SmallVector Postfixes; - auto TN = decodeSPIRVTypeName(STName, Postfixes); - if (TN == kSPIRVTypeName::JointMatrixINTEL) { - SPIRVType *TranslatedTy = transSPIRVJointMatrixINTELType(Postfixes); - PointeeTypeMap[TypeKey] = TranslatedTy; - return TranslatedTy; - } - } - } SPIRVType *ElementType = transType(ET); // ET, as a recursive type, may contain exactly the same pointer T, so it // may happen that after translation of ET we already have translated T, @@ -698,66 +683,6 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(SPIRVType *ET, unsigned AddrSpc) { return TranslatedTy; } -// Representation in LLVM IR before the translator is a pointer to an opaque -// structure: -// %spirv.JointMatrixINTEL._%element_type%_%rows%_%cols%_%layout%_%scope%_%use% -// Here we check the structure name yet again. Another option would be to -// check SPIR-V friendly function calls (by their name) and obtain return -// or their parameter types, assuming, that the appropriate types are Matrix -// structure type. But in the near future, we will reuse Composite -// instructions to do, for example, matrix initialization directly on AMX -// register by OpCompositeConstruct. And we can't claim, that the Result type -// of OpCompositeConstruct instruction is always the joint matrix type, it's -// simply not true. -SPIRVType *LLVMToSPIRVBase::transSPIRVJointMatrixINTELType( - SmallVector Postfixes) { - auto ParseInteger = [this](StringRef Postfix) -> ConstantInt * { - unsigned long long N = 0; - if (consumeUnsignedInteger(Postfix, 10, N)) - BM->getErrorLog().checkError( - false, SPIRVEC_InvalidLlvmModule, - "TypeJointMatrixINTEL expects integer parameters"); - return getUInt32(M, N); - }; - std::vector Args; - for (size_t I = 1; I != Postfixes.size(); ++I) - Args.emplace_back(transConstant(ParseInteger(Postfixes[I]))); - - Type *ElemTy = nullptr; - StringRef Ty{Postfixes[0]}; - auto NumBits = llvm::StringSwitch(Ty) - .Case("char", 8) - .Case("short", 16) - .Case("int", 32) - .Case("long", 64) - .Default(0); - if (NumBits) { - ElemTy = IntegerType::get(M->getContext(), NumBits); - } else if (Ty == "half") { - ElemTy = Type::getHalfTy(M->getContext()); - } else if (Ty == "float") { - ElemTy = Type::getFloatTy(M->getContext()); - } else if (Ty == "double") { - ElemTy = Type::getDoubleTy(M->getContext()); - } else if (Ty == "bfloat16") { - ElemTy = Type::getInt16Ty(M->getContext()); - // TODO: add BF16 CTI when we do breaking change - // auto *CTI = transConstant(getUInt32(M, static_cast( - // internal::InternalJointMatrixCTI::Bfloat16))); - // Args.push_back(CTI); - // BM->addCapability(internal::CapabilityJointMatrixBF16ComponentTypeINTEL); - } else if (Ty == "tf32") { - ElemTy = Type::getFloatTy(M->getContext()); - auto *CTI = transConstant(getUInt32( - M, static_cast(internal::InternalJointMatrixCTI::TF32))); - Args.push_back(CTI); - BM->addCapability(internal::CapabilityJointMatrixTF32ComponentTypeINTEL); - } else { - llvm_unreachable("Unexpected type for matrix!"); - } - return BM->addJointMatrixINTELType(transType(ElemTy), Args); -} - SPIRVType *LLVMToSPIRVBase::transSPIRVOpaqueType(StringRef STName, unsigned AddrSpace) { std::pair Key = {STName, AddrSpace}; @@ -814,9 +739,7 @@ SPIRVType *LLVMToSPIRVBase::transSPIRVOpaqueType(StringRef STName, return SaveType(BM->addQueueType()); else if (TN == kSPIRVTypeName::PipeStorage) return SaveType(BM->addPipeStorageType()); - else if (TN == kSPIRVTypeName::JointMatrixINTEL) { - return SaveType(transSPIRVJointMatrixINTELType(Postfixes)); - } else + else return SaveType( BM->addOpaqueGenericType(SPIRVOpaqueTypeOpCodeMap::map(TN))); } diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll index 72010ed93e..71ea0a8afe 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix.ll @@ -1,207 +1,158 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -; RUN: llvm-dis -opaque-pointers=0 %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV: Capability JointMatrixINTEL -; CHECK-SPIRV: Extension "SPV_INTEL_joint_matrix" -; CHECK-SPIRV: Name [[#Kernel:]] "_ZTSZ4mainE11matrix_test" - -; CHECK-SPIRV-DAG: TypeInt [[#ShortTy:]] 16 0 -; CHECK-SPIRV-DAG: TypeInt [[#CharTy:]] 8 0 -; CHECK-SPIRV-DAG: TypeInt [[#IntTy:]] 32 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Zero:]] 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Two:]] 2 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Three:]] 3 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Sixteen:]] 16 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#FortyTwo:]] 42 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#CTy:]] [[#ShortTy]] [[#Two]] [[#Two]] [[#Zero]] [[#Three]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#ATy:]] [[#CharTy]] [[#Two]] [[#Sixteen]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#BTy:]] [[#CharTy]] [[#Sixteen]] [[#Two]] [[#Three]] [[#Three]] - -; CHECK-SPIRV: Function [[#]] [[#Kernel]] -; CHECK-SPIRV: FunctionParameter -; CHECK-SPIRV: FunctionParameter [[#]] [[#Stride:]] - -; CHECK-SPIRV: Label [[#Entry:]] -; CHECK-SPIRV: JointMatrixLoadINTEL [[#CTy]] [[#CLoaded:]] [[#Cptr:]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] - -; CHECK-SPIRV: Phi [[#CTy]] [[#C:]] [[#CLoaded]] [[#Entry]] [[#CMad:]] [[#ForBody:]] - -; CHECK-SPIRV: Label [[#ForBody]] -; CHECK-SPIRV: JointMatrixLoadINTEL [[#ATy]] [[#A:]] [[#Aptr:]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: JointMatrixLoadINTEL [[#BTy]] [[#B:]] [[#Bptr:]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: JointMatrixMadINTEL [[#CTy]] [[#CMad]] [[#A]] [[#B]] [[#C]] [[#Three]] -; CHECK-SPIRV: JointMatrixSUMadINTEL [[#CTy]] [[#UnusedMad1:]] [[#A]] [[#B]] [[#C]] [[#Three]] -; CHECK-SPIRV: JointMatrixUSMadINTEL [[#CTy]] [[#UnusedMad2:]] [[#A]] [[#B]] [[#C]] [[#Three]] -; CHECK-SPIRV: JointMatrixUUMadINTEL [[#CTy]] [[#UnusedMad3:]] [[#A]] [[#B]] [[#C]] [[#Three]] - -; CHECK-SPIRV: JointMatrixStoreINTEL [[#Cptr:]] [[#C]] [[#Stride]] [[#Zero]] [[#Three]] [[#Zero]] -; CHECK-SPIRV: CompositeConstruct [[#CTy]] [[#Cnew:]] [[#FortyTwo]] -; CHECK-SPIRV: Store [[#PtrToZero:]] [[#Zero]] -; CHECK-SPIRV: Load [[#]] [[#ZeroLoad:]] [[#PtrToZero]] -; CHECK-SPIRV: CompositeConstruct [[#CTy]] [[#CnewLoad:]] [[#ZeroLoad]] - - -; CHECK-LLVM: %spirv.JointMatrixINTEL._short_2_2_0_3 -; CHECK-LLVM: %spirv.JointMatrixINTEL._char_2_16_0_3_0 -; CHECK-LLVM: %spirv.JointMatrixINTEL._char_16_2_3_3 - -; CHECK-LLVM: [[CLoaded:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z77__spirv_JointMatrixLoadINTEL_RPU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3PU3AS4sliii(i16 addrspace(4)* [[CPtr:%.*]], i64 [[Stride:%.*]], i32 0, i32 3, i32 0) -; CHECK-LLVM: [[C:%.*]] = phi %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [ [[CLoaded]], %entry ], [ [[CMad:%.*]], %for.body.i ] -; CHECK-LLVM: [[A:%.*]] = call spir_func %spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* @_Z79__spirv_JointMatrixLoadINTEL_RPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS4cliii(i8 addrspace(4)* [[APtr:%.*]], i64 [[Stride]], i32 0, i32 3, i32 0) -; CHECK-LLVM: [[B:%.*]] = call spir_func %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* @_Z77__spirv_JointMatrixLoadINTEL_RPU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS4cliii(i8 addrspace(4)* [[BPtr:%.*]], i64 [[Stride]], i32 0, i32 3, i32 0) -; CHECK-LLVM: [[CMad1:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z27__spirv_JointMatrixMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) -; CHECK-LLVM: [[CMad2:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z29__spirv_JointMatrixSUMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) -; CHECK-LLVM: [[CMad3:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z29__spirv_JointMatrixUSMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) -; CHECK-LLVM: [[CMad4:%.*]] = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z29__spirv_JointMatrixUUMadINTELPU3AS141__spirv_JointMatrixINTEL__char_2_16_0_3_0PU3AS139__spirv_JointMatrixINTEL__char_16_2_3_3PU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3i(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(1)* [[A]], %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(1)* [[B]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i32 3) - -; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS4sPU3AS139__spirv_JointMatrixINTEL__short_2_2_0_3liii(i16 addrspace(4)* [[CPtr]], %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* [[C]], i64 [[Stride]], i32 0, i32 3, i32 0) -; CHECK-LLVM: call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z26__spirv_CompositeConstructi(i32 42) -; CHECK-LLVM: store i32 0, i32 addrspace(4)* [[StoredZero:%.*]], align 4 -; CHECK-LLVM: [[LoadedZero:%.*]] = load i32, i32 addrspace(4)* [[StoredZero]], align 8 -; CHECK-LLVM: call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(1)* @_Z26__spirv_CompositeConstructi(i32 [[LoadedZero]]) - -; ModuleID = 'joint_matrix_test-sycl-spir64-unknown-unknown.bc' -source_filename = "./joint_matrix_test.cpp" +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability JointMatrixINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV-DAG: TypeInt [[#Int8Ty:]] 8 0 +; CHECK-SPIRV-DAG: TypeInt [[#Int32Ty:]] 32 0 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const12:]] 12 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const0:]] 0 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const48:]] 48 +; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Int32Ty]] [[#Const12]] [[#Const12]] [[#Const3]] [[#Const3]] [[#Const2]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#Int8Ty]] [[#Const12]] [[#Const48]] [[#Const0]] [[#Const3]] [[#Const0]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#Int8Ty]] [[#Const48]] [[#Const12]] [[#Const2]] [[#Const3]] [[#Const1]] +; CHECK-SPIRV: CompositeConstruct [[#MatTy1]] +; CHECK-SPIRV: JointMatrixLoadINTEL [[#MatTy2]] +; CHECK-SPIRV: JointMatrixLoadINTEL [[#MatTy3]] +; CHECK-SPIRV: JointMatrixMadINTEL [[#MatTy1]] +; CHECK-SPIRV: JointMatrixStoreINTEL + +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__char_12_48_0_3_0PU3AS4cliii +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__char_48_12_2_3_1PU3AS4cliii +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELPU3AS142__spirv_JointMatrixINTEL__char_12_48_0_3_0PU3AS142__spirv_JointMatrixINTEL__char_48_12_2_3_1PU3AS142__spirv_JointMatrixINTEL__uint_12_12_3_3_2i(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) %{{.*}}, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) %{{.*}}, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) +; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS4iPU3AS142__spirv_JointMatrixINTEL__uint_12_12_3_3_2liii(ptr addrspace(4) %call.ascast.i.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) + +; ModuleID = 'test-matrix-opaque.bc' +source_filename = "matrix-int8-test.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -%spirv.JointMatrixINTEL._short_2_2_0_3 = type { [2 x [2 x [1 x [4 x [4 x i16]]]]]* } -%spirv.JointMatrixINTEL._char_2_16_0_3_0 = type { [2 x [16 x [1 x [4 x [1 x i8]]]]]* } -%spirv.JointMatrixINTEL._char_16_2_3_3 = type { [16 x [2 x [4 x [4 x i8]]]]* } +%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } +%"class.sycl::_V1::detail::array" = type { [2 x i64] } +%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } -$_ZTSZ4mainE11matrix_test = comdat any +$_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix = comdat any @__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 @__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 ; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11matrix_test(i16 addrspace(1)* %_arg_, i64 %_arg_1, i8 addrspace(1)* %_arg_3, i8 addrspace(1)* %_arg_5) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !5 !intel_reqd_sub_group_size !6 { +define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { entry: - %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !7 - %1 = extractelement <3 x i64> %0, i64 1 - %2 = extractelement <3 x i64> %0, i64 0 - %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInLocalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !14 - %4 = extractelement <3 x i64> %3, i64 1 - %5 = extractelement <3 x i64> %3, i64 0 - %cmp.i.i = icmp ult i64 %1, 2147483648 - tail call void @llvm.assume(i1 %cmp.i.i) - %cmp.i45.i = icmp ult i64 %2, 2147483648 - tail call void @llvm.assume(i1 %cmp.i45.i) - %cmp.i43.i = icmp ult i64 %4, 2147483648 - tail call void @llvm.assume(i1 %cmp.i43.i) - %sub.i = sub nsw i64 %1, %4 - %cmp.i41.i = icmp ult i64 %5, 2147483648 - tail call void @llvm.assume(i1 %cmp.i41.i) - %sub5.i = sub nsw i64 %2, %5 - %mul6.i = shl nsw i64 %sub.i, 6 - %add.ptr.i51 = getelementptr inbounds i16, i16 addrspace(1)* %_arg_, i64 %mul6.i - %add.ptr7.i52 = getelementptr inbounds i16, i16 addrspace(1)* %add.ptr.i51, i64 %sub5.i - %add.ptr7.i = addrspacecast i16 addrspace(1)* %add.ptr7.i52 to i16 addrspace(4)* - %call8.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i16 addrspace(4)* %add.ptr7.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %add.ptr11.i53 = getelementptr inbounds i8, i8 addrspace(1)* %_arg_3, i64 %mul6.i - %add.ptr16.i55 = getelementptr inbounds i8, i8 addrspace(1)* %_arg_5, i64 %sub5.i + %sub_c.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 + %ref.tmp29.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 + %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 + %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 + %0 = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 0 + %agg.tmp16.sroa.0.sroa.0.0.copyload = load i64, ptr %0, align 8 + %agg.tmp16.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 1 + %agg.tmp16.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp16.sroa.0.sroa.2.0..sroa_idx, align 8 + %mul.i4.i.i.i.i45 = mul i64 %agg.tmp16.sroa.0.sroa.0.0.copyload, %agg.tmp15.sroa.0.sroa.2.0.copyload + %add.i6.i.i.i.i46 = add i64 %mul.i4.i.i.i.i45, %agg.tmp16.sroa.0.sroa.2.0.copyload + %add.ptr.i47 = getelementptr inbounds i8, ptr addrspace(1) %_arg_accB, i64 %add.i6.i.i.i.i46 + %1 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 + %2 = extractelement <3 x i64> %1, i64 1 + %3 = extractelement <3 x i64> %1, i64 0 + %4 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 + %5 = extractelement <3 x i64> %4, i64 1 + %6 = extractelement <3 x i64> %4, i64 0 + %cmp.i.i = icmp ult i64 %2, 2147483648 + %cmp.i54.i = icmp ult i64 %3, 2147483648 + %cmp.i56.i = icmp ult i64 %5, 2147483648 + %sub.i = sub nsw i64 %2, %5 + %cmp.i58.i = icmp ult i64 %6, 2147483648 + %sub5.i = sub nsw i64 %3, %6 + %sub_c.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %sub_c.sroa.0.i to ptr + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef 0) #4 + store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 + %mul.i = mul nsw i64 %sub.i, 12 + %div2452.i = lshr i64 %sub5.i, 4 + %mul26.i = mul i64 %div2452.i, 48 + %div.i = udiv i64 %_arg_K, 48 + %mul11.i = mul i64 %mul.i, %_arg_K + %add.ptr.i93.i = getelementptr inbounds i8, ptr addrspace(1) %_arg_accA, i64 %mul11.i + %idx.neg.i.i104.i = sub i64 0, %add.i6.i.i.i.i46 + %add.ptr.i.i105141.i = getelementptr i8, ptr addrspace(1) %add.ptr.i47, i64 %mul26.i + %mul22.i = shl i64 %_arg_N, 2 + %add.ptr.i108140.i = getelementptr i8, ptr addrspace(1) %add.ptr.i.i105141.i, i64 %idx.neg.i.i104.i + %ref.tmp29.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %ref.tmp29.sroa.0.i to ptr + %7 = bitcast ptr %ref.tmp29.sroa.0.i to ptr + %8 = bitcast ptr %sub_c.sroa.0.i to ptr br label %for.cond.i for.cond.i: ; preds = %for.body.i, %entry %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] - %C.0.i = phi %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* [ %call8.i, %entry ], [ %call19.i, %for.body.i ] - %cmp.i = icmp ult i32 %k.0.i, 32 - br i1 %cmp.i, label %for.body.i, label %_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit + %conv.i = zext i32 %k.0.i to i64 + %cmp.i = icmp ugt i64 %div.i, %conv.i + br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit for.body.i: ; preds = %for.cond.i - %idx.ext.i = zext i32 %k.0.i to i64 - %add.ptr12.i54 = getelementptr inbounds i8, i8 addrspace(1)* %add.ptr11.i53, i64 %idx.ext.i - %add.ptr12.i = addrspacecast i8 addrspace(1)* %add.ptr12.i54 to i8 addrspace(4)* - %call13.i = tail call spir_func %spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm2ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)* %add.ptr12.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %mul14.i = shl nuw nsw i32 %k.0.i, 5 - %idx.ext15.i = zext i32 %mul14.i to i64 - %add.ptr17.i56 = getelementptr inbounds i8, i8 addrspace(1)* %add.ptr16.i55, i64 %idx.ext15.i - %add.ptr17.i = addrspacecast i8 addrspace(1)* %add.ptr17.i56 to i8 addrspace(4)* - %call18.i = tail call spir_func %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm16ELm2ELN5__spv12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)* %add.ptr17.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %call19.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %call20.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixSUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %call21.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUSMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %call22.i = tail call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %add.i = add nuw nsw i32 %k.0.i, 16 - br label %for.cond.i, !llvm.loop !19 - -_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit: ; preds = %for.cond.i - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(i16 addrspace(4)* %add.ptr7.i, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* %C.0.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %C.0.i.new = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructi(i32 42) #1 - %ref.tmp = alloca i32, align 4 - %ref.tmp.ascast = addrspacecast i32* %ref.tmp to i32 addrspace(4)* - store i32 0, i32 addrspace(4)* %ref.tmp.ascast, align 4 - %zero = load i32, i32 addrspace(4)* %ref.tmp.ascast, align 8 - %C.0.i.new.load = call spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructi(i32 %zero) #1 + %mul12.i = mul nsw i32 %k.0.i, 48 + %conv13.i = zext i32 %mul12.i to i64 + %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i + %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 + %div20.i = mul nsw i32 %k.0.i, 12 + %conv21.i = zext i32 %div20.i to i64 + %mul23.i = mul i64 %mul22.i, %conv21.i + %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i + %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) + %call1.i73.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i, i32 noundef 2, i32 noundef 3, i32 noundef 0) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 + %call.i77.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef %call1.i.i, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef %call1.i73.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 3) #4 + store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 + %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %7, align 8 + store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %8, align 8 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) + %add.i = add nuw nsw i32 %k.0.i, 1 + br label %for.cond.i +_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond.i + %mul37.i = mul i64 %mul.i, %_arg_N + %add.ptr.i.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_accC, i64 %mul37.i + %mul39.i = mul nuw i64 %div2452.i, 12 + %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i + %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) ret void } ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i16 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm2ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIaLm16ELm2ELN5__spv12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(i8 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixSUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUSMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z29__spirv_JointMatrixUUMadINTELIasLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS1_3ELS1_0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS4_IT_XT1_EXT2_EXT4_EXT7_EEEPNS4_IS8_XT2_EXT3_EXT5_EXT7_EEES7_S3_(%spirv.JointMatrixINTEL._char_2_16_0_3_0 addrspace(4)*, %spirv.JointMatrixINTEL._char_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIsLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(i16 addrspace(4)*, %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._short_2_2_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructi(i32) #1 - -; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn -declare void @llvm.assume(i1 noundef) #2 - -attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="./joint_matrix_test.cpp" "uniform-work-group-size"="true" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { inaccessiblememonly nofree nosync nounwind willreturn } -attributes #3 = { convergent } - -!llvm.module.flags = !{!0, !1} -!opencl.spir.version = !{!2} -!spirv.Source = !{!3} -!llvm.ident = !{!4} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"frame-pointer", i32 2} -!2 = !{i32 1, i32 2} -!3 = !{i32 4, i32 100000} -!4 = !{!"clang version 13.0.0 (https://github.com/intel/llvm.git b3243d9f711a1cd80681530d6017324796668d51)"} -!5 = !{i32 -1, i32 -1, i32 -1, i32 -1} -!6 = !{i32 16} -!7 = !{!8, !10, !12} -!8 = distinct !{!8, !9, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!9 = distinct !{!9, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!10 = distinct !{!10, !11, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!11 = distinct !{!11, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!12 = distinct !{!12, !13, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!13 = distinct !{!13, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!14 = !{!15, !17, !12} -!15 = distinct !{!15, !16, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!16 = distinct !{!16, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!17 = distinct !{!17, !18, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!18 = distinct !{!18, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!19 = distinct !{!19, !20, !21} -!20 = !{!"llvm.loop.mustprogress"} -!21 = !{!"llvm.loop.unroll.disable"} +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 +attributes #0 = { convergent norecurse "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="matrix-int8-test.cpp" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #4 = { convergent } diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll index 8109cb0ed1..80c014b689 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_bfloat16.ll @@ -1,218 +1,202 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc +; compiled from joint_matrix_bfloat16.cpp test from intel/llvm -; RUN: llvm-spirv -s %t.bc -opaque-pointers=0 -o %t.regularized.bc -; RUN: llvm-dis -opaque-pointers=0 %t.regularized.bc -o %t.regularized.ll -; RUN: FileCheck < %t.regularized.ll %s --check-prefix=CHECK-REGULARIZED +; RUN: llvm-as < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 --spirv-ext=+SPV_INTEL_bfloat16_conversion,+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_bfloat16_conversion,+SPV_INTEL_joint_matrix -o %t.spv ; RUN: llvm-spirv %t.spv -to-text -o %t.spt ; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -; RUN: llvm-dis -opaque-pointers=0 < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-REGULARIZED: %[[Alloca:.*]] = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 -; CHECK-REGULARIZED: %[[ASCast:.*]] = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %[[Alloca]] to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* -; CHECK-REGULARIZED: %[[GEP1:.*]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %[[ASCast]], i64 0, i32 0 -; CHECK-REGULARIZED: %[[#Extract:]] = call spir_func i16 @_Z28__spirv_VectorExtractDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EET_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* align 2 %{{.*}}, i64 noundef %{{.*}}) -; CHECK-REGULARIZED: %[[#GEP2:]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %[[ASCast]], i32 0, i32 0 -; CHECK-REGULARIZED: store i16 %[[#Extract]], i16 addrspace(4)* %[[#GEP2]], align 2 -; CHECK-REGULARIZED: %[[#Load:]] = load i16, i16 addrspace(4)* %[[GEP1]], align 2 -; CHECK-REGULARIZED: %[[ConvertVal:.*]] = call spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext %[[#Load]]) -; CHECK-REGULARIZED: %{{.*}} = fadd float %[[ConvertVal]], %{{.*}} - -; CHECK-SPIRV: TypeInt [[#TypeI16ID:]] 16 0 -; CHECK-SPIRV: TypeFloat [[#TypeFID:]] 32 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#TypeJointMID:]] [[#TypeI16ID]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: Phi [[#TypeJointMID]] [[#PhiID:]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: VectorExtractDynamic [[#TypeI16ID]] [[#ExtractID:]] [[#PhiID]] [[#]] -; CHECK-SPIRV: Store [[#PtrID:]] [[#ExtractID]] [[#]] [[#]] -; CHECK-SPIRV: ConvertBF16ToFINTEL [[#TypeFID]] [[#Conv1ID:]] [[#]] -; CHECK-SPIRV: ConvertBF16ToFINTEL [[#TypeFID]] [[#Conv2ID:]] [[#]] -; CHECK-SPIRV: FAdd [[#TypeFID]] [[#ResId:]] [[#Conv1ID]] [[#Conv2ID]] -; CHECK-SPIRV: ConvertFToBF16INTEL [[#TypeI16ID]] [[#]] [[#ResId]] -; CHECK-SPIRV: Load [[#TypeI16ID]] [[#LoadID:]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: VectorInsertDynamic [[#TypeJointMID]] [[#]] [[#PhiID]] [[#LoadID]] [[#]] - -; CHECK-LLVM: %spirv.JointMatrixINTEL._short_8_16_0_3 -; CHECK-LLVM: %[[GEP1:.*]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %{{.*}}, i64 0, i32 0 -; CHECK-LLVM: %[[GEP2:.*]] = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %{{.*}}, i64 0, i32 0 -; CHECK-LLVM: %[[ConvertConst:.*]] = call spir_func i16 @_Z32intel_convert_bfloat16_as_ushortf(float 2.000000e+00) -; CHECK-LLVM: %[[#LoadGEP:]] = load i16, i16 addrspace(4)* %[[GEP2]], align 2 -; CHECK-LLVM: %[[ConvertVal:.*]] = call spir_func float @_Z31intel_convert_as_bfloat16_floats(i16 %[[#LoadGEP]]) -; CHECK-LLVM: %[[ConvertConstToF:.*]] = call spir_func float @_Z31intel_convert_as_bfloat16_floats(i16 %[[ConvertConst]]) -; CHECK-LLVM: %[[FAddRes:.*]] = fadd float %[[ConvertVal]], %[[ConvertConstToF]] -; CHECK-LLVM: %[[ConvertResToBF:.*]] = call spir_func i16 @_Z32intel_convert_bfloat16_as_ushortf(float %[[FAddRes]]) -; CHECK-LLVM: store i16 %[[ConvertResToBF]], i16 addrspace(4)* %[[#]], align 2 - -; ModuleID = 'joint_matrix_bfloat16_test.bc' -source_filename = "joint_matrix_bfloat16_test.cpp" +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: TypeInt [[#SHORT:]] 16 +; CHECK-SPIRV-DAG: TypeInt [[#INT:]] 32 +; CHECK-SPIRV-DAG: TypeFloat [[#Float:]] 32 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST8:]] 8 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST16:]] 16 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST3:]] 3 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST2:]] 2 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST1:]] 1 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST0:]] 0 +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Float]] [[#CONST8]] [[#CONST16]] [[#CONST3]] [[#CONST3]] [[#CONST2]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#SHORT]] [[#CONST8]] [[#CONST16]] [[#CONST0]] [[#CONST3]] [[#CONST0]] [[#CONST1]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#SHORT]] [[#CONST16]] [[#CONST16]] [[#CONST2]] [[#CONST3]] [[#CONST1]] [[#CONST1]] + +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2PU3AS1fliii(ptr addrspace(1) %{{.*}}, i64 32, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @"_Z82__spirv_JointMatrixLoadINTEL_RPU3AS144__spirv_JointMatrixINTEL__short_8_16_0_3_0_1PU3AS138class.sycl::_V1::ext::oneapi::bfloat16liii"(ptr addrspace(1) %{{.*}}, i64 32, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @"_Z83__spirv_JointMatrixLoadINTEL_RPU3AS145__spirv_JointMatrixINTEL__short_16_16_2_3_1_1PU3AS138class.sycl::_V1::ext::oneapi::bfloat16liii"(ptr addrspace(1) %{{.*}}, i64 64, i32 2, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELPU3AS144__spirv_JointMatrixINTEL__short_8_16_0_3_0_1PU3AS145__spirv_JointMatrixINTEL__short_16_16_2_3_1_1PU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2i(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) %{{.*}}, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i32 3) +; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS1fPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2liii(ptr addrspace(1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i64 32, i32 0, i32 3, i32 0) + +; ModuleID = 'joint_matrix_bfloat16-sycl-spir64-unknown-unknown.bc' +source_filename = "../joint_matrix_bfloat16.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -%class.anon = type { %"class.cl::sycl::accessor" } -%"class.cl::sycl::accessor" = type { %"class.cl::sycl::detail::AccessorImplDevice", %union.anon } -%"class.cl::sycl::detail::AccessorImplDevice" = type { %"class.cl::sycl::id", %"class.cl::sycl::range", %"class.cl::sycl::range" } -%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" } -%"class.cl::sycl::detail::array" = type { [2 x i64] } -%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" } -%union.anon = type { %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* } -%"class.cl::sycl::ext::intel::experimental::bfloat16" = type { i16 } -%"class.cl::sycl::nd_item" = type { %"class.cl::sycl::item", %"class.cl::sycl::item.0", %"class.cl::sycl::group" } -%"class.cl::sycl::item" = type { %"struct.cl::sycl::detail::ItemBase" } -%"struct.cl::sycl::detail::ItemBase" = type { %"class.cl::sycl::range", %"class.cl::sycl::id", %"class.cl::sycl::id" } -%"class.cl::sycl::item.0" = type { %"struct.cl::sycl::detail::ItemBase.1" } -%"struct.cl::sycl::detail::ItemBase.1" = type { %"class.cl::sycl::range", %"class.cl::sycl::id" } -%"class.cl::sycl::group" = type { %"class.cl::sycl::range", %"class.cl::sycl::range", %"class.cl::sycl::range", %"class.cl::sycl::id" } -%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 = type opaque - -$_ZZZ17matrix_verify_addIN2cl4sycl3ext5intel12experimental8bfloat16ELm16ELm16EEvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_ = comdat any - -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #0 - -; Function Attrs: convergent inlinehint norecurse -define linkonce_odr dso_local spir_func void @_ZZZ17matrix_verify_addIN2cl4sycl3ext5intel12experimental8bfloat16ELm16ELm16EEvNS1_5queueER10big_matrixIT_XT0_EXT1_EERNS1_8nd_rangeILi2EEEfENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_(%class.anon addrspace(4)* noundef align 8 dereferenceable_or_null(56) %this, %"class.cl::sycl::nd_item"* noundef byval(%"class.cl::sycl::nd_item") align 8 %spmd_item) local_unnamed_addr #1 comdat align 2 { +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } + +$_ZTSZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_E7imatrix = comdat any + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_E7imatrix(ptr addrspace(1) noundef align 4 %_arg_accC, ptr addrspace(1) noundef align 2 %_arg_accA, ptr addrspace(1) noundef align 2 %_arg_accB) local_unnamed_addr #0 comdat !srcloc !48 !kernel_arg_buffer_location !49 !kernel_arg_runtime_aligned !50 !kernel_arg_exclusive_ptr !50 !intel_reqd_sub_group_size !51 !sycl_fixed_targets !52 !sycl_kernel_omit_args !53 { entry: - %ref.tmp.i = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 - %agg.tmp.i54 = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 - %agg.tmp.i = alloca %"class.cl::sycl::ext::intel::experimental::bfloat16", align 2 - %spmd_item.ascast = addrspacecast %"class.cl::sycl::nd_item"* %spmd_item to %"class.cl::sycl::nd_item" addrspace(4)* - %arrayidx.i.i.i = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 0 - %0 = load i64, i64 addrspace(4)* %arrayidx.i.i.i, align 8, !tbaa !5 - %cmp.i = icmp ult i64 %0, 2147483648 - tail call void @llvm.assume(i1 %cmp.i) - %arrayidx.i.i.i29 = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 1 - %1 = load i64, i64 addrspace(4)* %arrayidx.i.i.i29, align 8, !tbaa !5 - %cmp.i30 = icmp ult i64 %1, 2147483648 - tail call void @llvm.assume(i1 %cmp.i30) - %arrayidx.i.i.i31 = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 1, i32 0, i32 1, i32 0, i32 0, i64 0 - %2 = load i64, i64 addrspace(4)* %arrayidx.i.i.i31, align 8, !tbaa !5 - %cmp.i32 = icmp ult i64 %2, 2147483648 - tail call void @llvm.assume(i1 %cmp.i32) - %arrayidx.i.i.i33 = getelementptr inbounds %"class.cl::sycl::nd_item", %"class.cl::sycl::nd_item" addrspace(4)* %spmd_item.ascast, i64 0, i32 1, i32 0, i32 1, i32 0, i32 0, i64 1 - %3 = load i64, i64 addrspace(4)* %arrayidx.i.i.i33, align 8, !tbaa !5 - %cmp.i34 = icmp ult i64 %3, 2147483648 - tail call void @llvm.assume(i1 %cmp.i34) - %4 = bitcast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i to i8* - call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %4) - %agg.tmp.ascast.i = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* - %call.i.i.i = tail call spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef 5.000000e+00) #6 - %value.i.i = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %agg.tmp.ascast.i, i64 0, i32 0 - store i16 %call.i.i.i, i16 addrspace(4)* %value.i.i, align 2, !tbaa !9 - %call.i = tail call spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESB_(%"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef nonnull byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2 %agg.tmp.i) #7 - call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %4) - %ref.tmp.ascast.i = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %ref.tmp.i to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* - %5 = bitcast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %ref.tmp.i to i8* - %value.i.i.i = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* %ref.tmp.ascast.i, i64 0, i32 0 - %6 = bitcast %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i54 to i8* - %7 = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16"* %agg.tmp.i54, i64 0, i32 0 - %8 = addrspacecast i16* %7 to i16 addrspace(4)* - br label %for.cond - -for.cond: ; preds = %for.body, %entry - %sub_a.sroa.0.0 = phi %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* [ %call.i, %entry ], [ %call.i58, %for.body ] - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %conv = zext i32 %i.0 to i64 - %call.i41 = call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEmPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0) #7 - %cmp = icmp ugt i64 %call.i41, %conv - br i1 %cmp, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond - %sub5 = sub nsw i64 %1, %3 - %sub = sub nsw i64 %0, %2 - %MData.i.i.i = getelementptr inbounds %class.anon, %class.anon addrspace(4)* %this, i64 0, i32 0, i32 1, i32 0 - %9 = load %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)*, %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* addrspace(4)* %MData.i.i.i, align 8, !tbaa !12, !noalias !13 - %mul19 = shl nsw i64 %sub, 7 - %add.ptr.i = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* %9, i64 %mul19 - %div = and i64 %sub5, -8 - %add.ptr.i45 = getelementptr inbounds %"class.cl::sycl::ext::intel::experimental::bfloat16", %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* %add.ptr.i, i64 %div - %call.ascast.i = addrspacecast %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(1)* %add.ptr.i45 to %"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* - call spir_func void @_Z29__spirv_JointMatrixStoreINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEvPT_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEmS7_S9_i(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* noundef %call.ascast.i, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0, i64 noundef 16, i32 noundef 0, i32 noundef 3, i32 noundef 0) #7 + call void @__itt_offload_wi_start_wrapper() + %0 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 8), align 8, !noalias !54 + %1 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !54 + %2 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !61 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !61 + %cmp.i.i = icmp ult i64 %0, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + %cmp.i50.i = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i50.i) + %cmp.i52.i = icmp ult i64 %2, 2147483648 + tail call void @llvm.assume(i1 %cmp.i52.i) + %sub.i = sub nsw i64 %0, %2 + %cmp.i55.i = icmp ult i64 %3, 2147483648 + tail call void @llvm.assume(i1 %cmp.i55.i) + %sub5.i = sub nsw i64 %1, %3 + %mul8.i = shl nsw i64 %sub.i, 8 + %add.ptr.i.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accC, i64 %mul8.i + %div48.i = and i64 %sub5.i, -16 + %add.ptr.i69.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i, i64 %div48.i + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef %add.ptr.i69.i, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %mul28.i = shl nsw i64 %div48.i, 1 + %add.ptr.i84.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %_arg_accA, i64 %mul8.i + %invariant.gep = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %_arg_accB, i64 %mul28.i + br label %for.cond.i + +for.cond.i: ; preds = %for.body.i, %entry + %sub_c.sroa.0.0.i = phi target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) [ %call1.i.i, %entry ], [ %call.i.i, %for.body.i ] + %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] + %cmp.i = icmp ult i32 %k.0.i, 2 + br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit + +for.body.i: ; preds = %for.cond.i + %mul16.i = shl nuw nsw i32 %k.0.i, 4 + %conv17.i = zext i32 %mul16.i to i64 + %add.ptr.i85.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i84.i, i64 %conv17.i + %call1.i63.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i85.i, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %div23.i = shl nuw nsw i32 %k.0.i, 3 + %conv24.i = zext i32 %div23.i to i64 + %mul25.i = shl nuw nsw i64 %conv24.i, 6 + %gep = getelementptr %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %invariant.gep, i64 %mul25.i + %call1.i67.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 64, i32 noundef 2, i32 noundef 3, i32 noundef 0) #3 + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef %call1.i63.i, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef %call1.i67.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i32 noundef 3) #3, !noalias !66 + %add.i = add nuw nsw i32 %k.0.i, 1 + br label %for.cond.i, !llvm.loop !69 + +_ZZZ15matrix_multiplyIfN4sycl3_V13ext6oneapi8bfloat16ELm16ELm32ELm32EEvR10big_matrixIT_XT1_EXT2_EERS5_IT0_XT1_EXT3_EERS5_IS9_XdvT3_Li2EEXmlT2_Li2EEEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit: ; preds = %for.cond.i + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i69.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + call void @__itt_offload_wi_finish_wrapper() ret void - -for.body: ; preds = %for.cond - %call.i.i = call spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef 2.000000e+00) #6 - call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %5) #8, !noalias !16 - call spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EET_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* sret(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2 %ref.tmp.ascast.i, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0, i64 noundef %conv) #7, !noalias !16 - %10 = load i16, i16 addrspace(4)* %value.i.i.i, align 2, !tbaa !19, !noalias !20 - %call.i.i.i.i = call spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext %10) #6, !noalias !20 - %call.i.i3.i.i = call spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext %call.i.i) #6, !noalias !20 - %add.i.i = fadd float %call.i.i.i.i, %call.i.i3.i.i - %call.i.i4.i.i = call spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef %add.i.i) #6, !noalias !20 - call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %5) #8, !noalias !16 - call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %6) - store i16 %call.i.i4.i.i, i16 addrspace(4)* %8, align 2, !tbaa !19 - %call.i58 = call spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESD_SB_m(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef %sub_a.sroa.0.0, %"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef nonnull byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2 %agg.tmp.i54, i64 noundef %conv) #7 - call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %6) - %inc = add nuw nsw i32 %i.0, 1 - br label %for.cond, !llvm.loop !23 } -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #0 - -; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn -declare void @llvm.assume(i1 noundef) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #1 -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z26__spirv_CompositeConstructIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESB_(%"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2) local_unnamed_addr #4 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEmPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef) local_unnamed_addr #4 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EET_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* sret(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef, i64 noundef) local_unnamed_addr #4 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef float @_Z27__spirv_ConvertBF16ToFINTELt(i16 noundef zeroext) local_unnamed_addr #5 +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef zeroext i16 @_Z27__spirv_ConvertFToBF16INTELf(float noundef) local_unnamed_addr #5 +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESD_SB_m(%spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef, %"class.cl::sycl::ext::intel::experimental::bfloat16"* noundef byval(%"class.cl::sycl::ext::intel::experimental::bfloat16") align 2, i64 noundef) local_unnamed_addr #4 +declare void @__itt_offload_wi_start_wrapper() -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIN2cl4sycl3ext5intel12experimental8bfloat16ELm8ELm16ELN5__spv12MatrixLayoutE0ELNS6_5Scope4FlagE3EEvPT_PNS6_24__spirv_JointMatrixINTELISA_XT0_EXT1_EXT2_EXT3_EEEmS7_S9_i(%"class.cl::sycl::ext::intel::experimental::bfloat16" addrspace(4)* noundef, %spirv.JointMatrixINTEL._bfloat16_8_16_0_3 addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #4 +declare void @__itt_offload_wi_finish_wrapper() -attributes #0 = { argmemonly nofree nosync nounwind willreturn } -attributes #1 = { convergent inlinehint norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { argmemonly nofree nounwind willreturn writeonly } -attributes #3 = { inaccessiblememonly nofree nosync nounwind willreturn } -attributes #4 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #5 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #6 = { convergent nounwind } -attributes #7 = { convergent } -attributes #8 = { nounwind } +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../joint_matrix_bfloat16.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent nounwind } !llvm.module.flags = !{!0, !1} !opencl.spir.version = !{!2} !spirv.Source = !{!3} -!llvm.ident = !{!4} +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46} +!llvm.ident = !{!47} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"frame-pointer", i32 2} !2 = !{i32 1, i32 2} !3 = !{i32 4, i32 100000} -!4 = !{!"clang version 15.0.0 (https://github.com/pauzinl/llvm.git fb27c655023f19ff91f09413a0c51f0a37071cff)"} -!5 = !{!6, !6, i64 0} -!6 = !{!"long", !7, i64 0} -!7 = !{!"omnipotent char", !8, i64 0} -!8 = !{!"Simple C++ TBAA"} -!9 = !{!10, !11, i64 0} -!10 = !{!"_ZTSN2cl4sycl3ext5intel12experimental8bfloat16E", !11, i64 0} -!11 = !{!"short", !7, i64 0} -!12 = !{!7, !7, i64 0} -!13 = !{!14} -!14 = distinct !{!14, !15, !"_ZNK2cl4sycl8accessorINS0_3ext5intel12experimental8bfloat16ELi2ELNS0_6access4modeE1026ELNS6_6targetE2014ELNS6_11placeholderE0ENS2_6oneapi22accessor_property_listIJEEEE11get_pointerILS8_2014EvEENS0_9multi_ptrIS5_LNS6_13address_spaceE1EEEv: %agg.result"} -!15 = distinct !{!15, !"_ZNK2cl4sycl8accessorINS0_3ext5intel12experimental8bfloat16ELi2ELNS0_6access4modeE1026ELNS6_6targetE2014ELNS6_11placeholderE0ENS2_6oneapi22accessor_property_listIJEEEE11get_pointerILS8_2014EvEENS0_9multi_ptrIS5_LNS6_13address_spaceE1EEEv"} -!16 = !{!17} -!17 = distinct !{!17, !18, !"_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS1_5intel12experimental8bfloat16ELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_: %agg.result"} -!18 = distinct !{!18, !"_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS1_5intel12experimental8bfloat16ELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_"} -!19 = !{!11, !11, i64 0} -!20 = !{!21, !17} -!21 = distinct !{!21, !22, !"_ZN2cl4sycl3ext5intel12experimentalplERKNS3_8bfloat16ES6_: %agg.result"} -!22 = distinct !{!22, !"_ZN2cl4sycl3ext5intel12experimentalplERKNS3_8bfloat16ES6_"} -!23 = distinct !{!23, !24} -!24 = !{!"llvm.loop.mustprogress"} +!4 = !{!"cpu", i32 1} +!5 = !{!"gpu", i32 2} +!6 = !{!"accelerator", i32 3} +!7 = !{!"custom", i32 4} +!8 = !{!"fp16", i32 5} +!9 = !{!"fp64", i32 6} +!10 = !{!"image", i32 9} +!11 = !{!"online_compiler", i32 10} +!12 = !{!"online_linker", i32 11} +!13 = !{!"queue_profiling", i32 12} +!14 = !{!"usm_device_allocations", i32 13} +!15 = !{!"usm_host_allocations", i32 14} +!16 = !{!"usm_shared_allocations", i32 15} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"int64_base_atomics", i32 7} +!43 = !{!"int64_extended_atomics", i32 8} +!44 = !{!"usm_system_allocator", i32 17} +!45 = !{!"usm_restricted_shared_allocations", i32 16} +!46 = !{!"host", i32 0} +!47 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!48 = !{i32 10642943} +!49 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!50 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false} +!51 = !{i32 16} +!52 = !{} +!53 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true} +!54 = !{!55, !57, !59} +!55 = distinct !{!55, !56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!56 = distinct !{!56, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!57 = distinct !{!57, !58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!58 = distinct !{!58, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!59 = distinct !{!59, !60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!60 = distinct !{!60, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!61 = !{!62, !64, !59} +!62 = distinct !{!62, !63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!63 = distinct !{!63, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!64 = distinct !{!64, !65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!65 = distinct !{!65, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!66 = !{!67} +!67 = distinct !{!67, !68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!68 = distinct !{!68, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!69 = distinct !{!69, !70} +!70 = !{!"llvm.loop.mustprogress"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_element.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_element.ll deleted file mode 100644 index 9ddff0fc2e..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_element.ll +++ /dev/null @@ -1,121 +0,0 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+all -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV - -; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.rev.bc -; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV-DAG: Capability JointMatrixINTEL -; CHECK-SPIRV-DAG: Capability JointMatrixWIInstructionsINTEL -; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" -; CHECK-SPIRV-DAG: TypeInt [[#TypeInt32:]] 32 -; CHECK-SPIRV-DAG: TypeInt [[#TypeInt64:]] 64 -; CHECK-SPIRV-DAG: TypeFloat [[#TypeFloat:]] 32 -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#TypeMatrix:]] [[#TypeFloat]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV-DAG: TypeVector [[#TypeVec:]] [[#TypeInt32]] 2 -; CHECK-SPIRV: Phi [[#TypeMatrix]] [[#Matrix:]] -; CHECK-SPIRV: JointMatrixWorkItemLengthINTEL [[#TypeInt64]] [[#]] [[#Matrix]] -; CHECK-SPIRV: VectorExtractDynamic [[#TypeFloat]] [[#]] [[#Matrix]] [[#Index:]] -; CHECK-SPIRV: FMul [[#TypeFloat]] [[#NewVal:]] [[#]] [[#]] -; CHECK-SPIRV: VectorInsertDynamic [[#TypeMatrix]] [[#]] [[#Matrix]] [[#NewVal]] [[#Index]] -; CHECK-SPIRV: JointMatrixGetElementCoordINTEL [[#TypeVec]] [[#]] [[#Matrix]] [[#Index]] - -; CHECK-LLVM: [[Length:%.*]] = call spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3(ptr addrspace(1) [[Matrix:%.*]]) -; CHECK-LLVM: [[Elem:%.*]] = call spir_func float @_Z28__spirv_VectorExtractDynamicPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3l(ptr addrspace(1) [[Matrix]], i64 [[Index:%.*]]) -; CHECK-LLVM: [[NewVal:%.*]] = fmul float [[Elem]], 5.000000e+00 -; CHECK-LLVM: {{%.*}} = call spir_func ptr addrspace(1) @_Z27__spirv_VectorInsertDynamicPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3fl(ptr addrspace(1) [[Matrix]], float [[NewVal]], i64 [[Index]]) -; CHECK-LLVM: {{%.*}} = call spir_func <2 x i32> @_Z39__spirv_JointMatrixGetElementCoordINTELPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3l(ptr addrspace(1) [[Matrix]], i64 [[Index]]) - -source_filename = "/work/tmp/matrix-slice.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"struct.cl::sycl::detail::AssertHappened" = type { i32, [257 x i8], [257 x i8], [129 x i8], i32, i64, i64, i64, i64, i64, i64 } -%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" } -%"class.cl::sycl::detail::array" = type { [1 x i64] } -%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" } -%spirv.JointMatrixINTEL._float_16_16_0_3 = type opaque - -$_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE = comdat any - -$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E6matrix = comdat any - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE(%"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_1, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_2, %"class.cl::sycl::id"* byval(%"class.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !5 { -entry: - %0 = getelementptr inbounds %"class.cl::sycl::id", %"class.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds %"struct.cl::sycl::detail::AssertHappened", %"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, i64 %2 - %3 = bitcast %"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %add.ptr.i to i8 addrspace(1)* - %4 = addrspacecast i8 addrspace(1)* %3 to i8 addrspace(4)* - tail call spir_func void @__devicelib_assert_read(i8 addrspace(4)* %4) #2 - ret void -} - -; Function Attrs: convergent -declare extern_weak dso_local spir_func void @__devicelib_assert_read(i8 addrspace(4)*) local_unnamed_addr #1 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E6matrix() local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 { -entry: - %call9.i.i = tail call spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)* addrspacecast (float addrspace(1)* null to float addrspace(4)*), i64 1, i32 0, i32 3, i32 0) #2 - br label %for.cond.i - -for.cond.i: ; preds = %for.body.i, %entry - %A.sroa.0.0.i = phi %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* [ %call9.i.i, %entry ], [ %call5.i.i, %for.body.i ] - %i.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ] - %conv.i = zext i32 %i.0.i to i64 - %call.i12.i = tail call spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEmPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i) #2 - %cmp.i = icmp ugt i64 %call.i12.i, %conv.i - br i1 %cmp.i, label %for.body.i, label %_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_7nd_itemILi2EEEE_clES5_.exit - -for.body.i: ; preds = %for.cond.i - %call.i.i = tail call spir_func float @_Z28__spirv_VectorExtractDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmET_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEET4_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, i64 %conv.i) #2 - %mul.i.i = fmul float %call.i.i, 5.000000e+00 - %call5.i.i = tail call spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEES7_T4_S5_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, float %mul.i.i, i64 %conv.i) #2 - %call6 = tail call spir_func <2 x i32> @_Z39__spirv_JointMatrixGetElementCoordINTELIaLm8ELm32ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEDv2_jPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEm(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, i64 %conv.i) #2 - %inc.i = add nuw nsw i32 %i.0.i, 1 - br label %for.cond.i, !llvm.loop !7 - -_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_7nd_itemILi2EEEE_clES5_.exit: ; preds = %for.cond.i - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)* addrspacecast (float addrspace(1)* null to float addrspace(4)*), %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* %A.sroa.0.0.i, i64 1, i32 0, i32 3, i32 0) #2 - ret void -} - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEmPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func float @_Z28__spirv_VectorExtractDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmET_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEET4_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, i64) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEES7_T4_S5_(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, float, i64) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)*, %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 - -; Function Attrs: convergent -declare dso_local spir_func <2 x i32> @_Z39__spirv_JointMatrixGetElementCoordINTELIaLm8ELm32ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEDv2_jPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEm(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(4)*, i64) #2 - -attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/tmp/matrix-slice.cpp" "uniform-work-group-size"="true" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { convergent } - -!llvm.module.flags = !{!0, !1} -!opencl.spir.version = !{!2} -!spirv.Source = !{!3} -!llvm.ident = !{!4} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 7, !"frame-pointer", i32 2} -!2 = !{i32 1, i32 2} -!3 = !{i32 4, i32 100000} -!4 = !{!"clang version 14.0.0 (https://github.com/intel/llvm.git 3648adf79e4fdb619fdbe41d63bc39f456b5be8c)"} -!5 = !{i32 -1, i32 -1, i32 -1, i32 -1} -!6 = !{} -!7 = distinct !{!7, !8} -!8 = !{!"llvm.loop.mustprogress"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll deleted file mode 100644 index a0f259934f..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll +++ /dev/null @@ -1,130 +0,0 @@ -; RUN: llvm-as -opaque-pointers=0 %s -o %t.bc - -; RUN: llvm-spirv -s %t.bc -opaque-pointers=0 -o %t.regularized.bc -; RUN: llvm-dis -opaque-pointers=0 %t.regularized.bc -o %t.regularized.ll -; RUN: FileCheck < %t.regularized.ll %s --check-prefix=CHECK-REGULARIZED - -; RUN: llvm-spirv %t.bc -opaque-pointers=0 --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv -to-text %t.spv -o %t.spt -; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - -; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.rev.bc -; RUN: llvm-dis %t.rev.bc -o %t.rev.ll -; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM - -; CHECK-REGULARIZED: %[[#ExtractElementCall:]] = call spir_func half @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* align 2{{.*}}, i64{{.*}}) -; CHECK-REGULARIZED: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(4)*{{.*}}, i32 0, i32 0 -; CHECK-REGULARIZED: store half %[[#ExtractElementCall]], half addrspace(4)* %[[#GEP]] -; CHECK-REGULARIZED: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half"*{{.*}}, i32 0, i32 0 -; CHECK-REGULARIZED: %[[#Component:]] = load half, half*{{.*}}, align 2 -; CHECK-REGULARIZED: call spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*{{.*}}, half %[[#Component]], i64{{.*}}) -; CHECK-REGULARIZED: declare dso_local spir_func half @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* align 2, i64) -; CHECK-REGULARIZED: declare dso_local spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, half, i64) - -; CHECK-SPIRV: Name [[#VIDValueId:]] "agg.tmp.ascast.ascast" -; CHECK-SPIRV: TypeFloat [[#Float16Id:]] 16 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#JointMatrixTyId:]] [[#]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: VectorExtractDynamic [[#Float16Id]] [[#VEDId:]] [[#]] [[#]] -; CHECK-SPIRV: Store [[#]] [[#VEDId]] -; CHECK-SPIRV: PtrAccessChain [[#]] [[#GEPId:]] [[#VIDValueId]] [[#]] [[#]] -; CHECK-SPIRV: Load [[#Float16Id]] [[#ComponentId:]] [[#GEPId]] -; CHECK-SPIRV: VectorInsertDynamic [[#JointMatrixTyId]] [[#]] [[#]] [[#ComponentId]] [[#]] - -; CHECK-LLVM: %[[#ExtractElementCall:]] = call spir_func half @_Z28__spirv_VectorExtractDynamicPU3AS139__spirv_JointMatrixINTEL__half_8_16_0_3l(ptr addrspace(1){{.*}}, i64{{.*}}) -; CHECK-LLVM: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", ptr addrspace(4) {{.*}}, i32 0, i32 0 -; CHECK-LLVM: store half %[[#ExtractElementCall]], ptr addrspace(4) %[[#GEP]] - -; CHECK-LLVM: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", ptr{{.*}}, i32 0, i32 0 -; CHECK-LLVM: %[[#Component:]] = load half, ptr %[[#GEP]] -; CHECK-LLVM: spir_func ptr addrspace(1) @_Z27__spirv_VectorInsertDynamicPU3AS139__spirv_JointMatrixINTEL__half_8_16_0_3Dhl(ptr addrspace(1){{.*}}, half %[[#Component]], i64{{.*}}) - -; ModuleID = 'element_wise_all_ops_half.bc' -source_filename = "llvm-link" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class.cl::sycl::detail::half_impl::half" = type { half } -%"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" = type { %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* } -%spirv.JointMatrixINTEL._half_8_16_0_3 = type opaque -%"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" = type { %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, i64 } - -$_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_ = comdat any - -$_ZN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEaSERKS8_ = comdat any - -; Function Attrs: convergent mustprogress norecurse -define linkonce_odr dso_local spir_func void @_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* noalias sret(%"class.cl::sycl::detail::half_impl::half") align 2 %agg.result, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* align 8 dereferenceable(16) %lhs, %"class.cl::sycl::detail::half_impl::half" addrspace(4)* align 2 dereferenceable(2) %rhs) #0 comdat { -entry: - %lhs.addr = alloca %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, align 8 - %ref.tmp1 = alloca %"class.cl::sycl::detail::half_impl::half", align 2 - %lhs.addr.ascast = addrspacecast %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)** %lhs.addr to %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* - %ref.tmp1.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half"* %ref.tmp1 to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %0 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %lhs.addr.ascast, align 8, !tbaa !8 - %M = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %0, i32 0, i32 0 - %1 = load %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* addrspace(4)* %M, align 8, !tbaa !15 - %spvm = getelementptr inbounds %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix", %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* %1, i32 0, i32 0 - %2 = load %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* addrspace(4)* %spvm, align 8, !tbaa !13 - %3 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %lhs.addr.ascast, align 8, !tbaa !8 - %idx = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %3, i32 0, i32 1 - %4 = load i64, i64 addrspace(4)* %idx, align 8, !tbaa !17 - call spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* sret(%"class.cl::sycl::detail::half_impl::half") align 2 %ref.tmp1.ascast, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* %2, i64 %4) #2 - ret void -} - -; Function Attrs: convergent mustprogress norecurse -define linkonce_odr dso_local spir_func align 8 dereferenceable(16) %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* @_ZN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEaSERKS8_(%"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* align 8 dereferenceable_or_null(16) %this, %"class.cl::sycl::detail::half_impl::half" addrspace(4)* align 2 dereferenceable(2) %rhs) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, align 8 - %agg.tmp = alloca %"class.cl::sycl::detail::half_impl::half", align 2 - %this.addr.ascast = addrspacecast %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)** %this.addr to %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* - %agg.tmp.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half"* %agg.tmp to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %this1 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8 - %M = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1, i32 0, i32 0 - %0 = load %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* addrspace(4)* %M, align 8, !tbaa !15 - %spvm = getelementptr inbounds %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix", %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* %0, i32 0, i32 0 - %1 = load %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* addrspace(4)* %spvm, align 8, !tbaa !13 - %idx = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1, i32 0, i32 1 - %2 = load i64, i64 addrspace(4)* %idx, align 8, !tbaa !17 - %agg.tmp.ascast.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half" addrspace(4)* %agg.tmp.ascast to %"class.cl::sycl::detail::half_impl::half"* - %call = call spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* %1, %"class.cl::sycl::detail::half_impl::half"* byval(%"class.cl::sycl::detail::half_impl::half") align 2 %agg.tmp.ascast.ascast, i64 %2) #2 - ret %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1 -} - -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, %"class.cl::sycl::detail::half_impl::half"* byval(%"class.cl::sycl::detail::half_impl::half") align 2, i64) #1 - -; Function Attrs: convergent -declare dso_local spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* sret(%"class.cl::sycl::detail::half_impl::half") align 2, %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, i64) #1 - -attributes #0 = { convergent mustprogress norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { convergent } - -!opencl.spir.version = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0} -!spirv.Source = !{!1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1} -!opencl.used.extensions = !{!2, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3} -!opencl.used.optional.core.features = !{!4, !3, !3, !4, !3, !4, !3, !3, !3, !4, !3, !4} -!opencl.compiler.options = !{!3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3} -!llvm.ident = !{!5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5} -!llvm.module.flags = !{!6, !7} -!sycl.specialization-constants = !{} -!sycl.specialization-constants-default-values = !{} - -!0 = !{i32 1, i32 2} -!1 = !{i32 4, i32 100000} -!2 = !{!"cl_khr_fp16"} -!3 = !{} -!4 = !{!"cl_doubles"} -!5 = !{!"Compiler"} -!6 = !{i32 1, !"wchar_size", i32 4} -!7 = !{i32 7, !"frame-pointer", i32 2} -!8 = !{!9, !9, i64 0} -!9 = !{!"any pointer", !10, i64 0} -!10 = !{!"omnipotent char", !11, i64 0} -!11 = !{!"Simple C++ TBAA"} -!12 = !{!"long", !10, i64 0} -!13 = !{!14, !9, i64 0} -!14 = !{!"_ZTSN2cl4sycl3ext6oneapi12experimental6matrix12joint_matrixINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEE", !9, i64 0} -!15 = !{!16, !9, i64 0} -!16 = !{!"_ZTSN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEE", !9, i64 0, !12, i64 8} -!17 = !{!16, !12, i64 8} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll index 8ea9b3041d..6b5c380de5 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_half.ll @@ -1,166 +1,210 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; compiled from joint_matrix_half.cpp test from intel/llvm + +; RUN: llvm-as < %s -o %t.bc + +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_bfloat16_conversion,+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -; RUN: llvm-dis -opaque-pointers=0 %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV-DAG: TypeFloat [[#FloatTy:]] 32 -; CHECK-SPIRV-DAG: TypeFloat [[#HalfTy:]] 16 -; CHECK-SPIRV-DAG: TypeInt [[#IntTy:]] 32 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Zero:]] 0 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Two:]] 2 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Three:]] 3 -; CHECK-SPIRV-DAG: Constant [[#IntTy]] [[#Sixteen:]] 16 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#CTy:]] [[#FloatTy]] [[#Two]] [[#Two]] [[#Zero]] [[#Three]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#ATy:]] [[#HalfTy]] [[#Two]] [[#Sixteen]] [[#Zero]] [[#Three]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#BTy:]] [[#HalfTy]] [[#Sixteen]] [[#Two]] [[#Three]] [[#Three]] - -; CHECK-LLVM: %spirv.JointMatrixINTEL._float_2_2_0_3 -; CHECK-LLVM: %spirv.JointMatrixINTEL._half_2_16_0_3 -; CHECK-LLVM: %spirv.JointMatrixINTEL._half_16_2_3_3 - -; ModuleID = 'joint_matrix_test-sycl-spir64-unknown-unknown.bc' -source_filename = "joint_matrix_test.cpp" +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: TypeInt [[#INT:]] 32 +; CHECK-SPIRV-DAG: TypeFloat [[#Half:]] 16 +; CHECK-SPIRV-DAG: TypeFloat [[#Float:]] 32 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST8:]] 8 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST16:]] 16 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST3:]] 3 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST2:]] 2 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST1:]] 1 +; CHECK-SPIRV-DAG: Constant [[#INT]] [[#CONST0:]] 0 +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Float]] [[#CONST8]] [[#CONST16]] [[#CONST3]] [[#CONST3]] [[#CONST2]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#Half]] [[#CONST8]] [[#CONST16]] [[#CONST0]] [[#CONST3]] [[#CONST0]] +; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#Half]] [[#CONST16]] [[#CONST16]] [[#CONST2]] [[#CONST3]] [[#CONST1]] + +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2PU3AS1fliii(ptr addrspace(1) %{{.*}}, i64 %{{.*}}, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) @"_Z79__spirv_JointMatrixLoadINTEL_RPU3AS141__spirv_JointMatrixINTEL__half_8_16_0_3_0PU3AS140class.sycl::_V1::detail::half_impl::halfliii"(ptr addrspace(1) %{{.*}}, i64 %{{.*}}, i32 0, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) @"_Z80__spirv_JointMatrixLoadINTEL_RPU3AS142__spirv_JointMatrixINTEL__half_16_16_2_3_1PU3AS140class.sycl::_V1::detail::half_impl::halfliii"(ptr addrspace(1) %{{.*}}, i64 %{{.*}}, i32 2, i32 3, i32 0) +; CHECK-LLVM: call spir_func target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELPU3AS141__spirv_JointMatrixINTEL__half_8_16_0_3_0PU3AS142__spirv_JointMatrixINTEL__half_16_16_2_3_1PU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2i(target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) %{{.*}}, target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i32 3) +; CHECK-LLVM: call spir_func void @_Z29__spirv_JointMatrixStoreINTELPU3AS1fPU3AS142__spirv_JointMatrixINTEL__float_8_16_3_3_2liii(ptr addrspace(1) %{{.*}}, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %{{.*}}, i64 %{{.*}}, i32 0, i32 3, i32 0) + +; ModuleID = 'half.bc' +source_filename = "../joint_matrix_half.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" -%"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" = type { i32, [257 x i8], [257 x i8], [129 x i8], i32, i64, i64, i64, i64, i64, i64 } -%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] } -%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" } -%"class.cl::sycl::detail::half_impl::half" = type { half } -%spirv.JointMatrixINTEL._float_2_2_0_3 = type opaque -%spirv.JointMatrixINTEL._half_2_16_0_3 = type opaque -%spirv.JointMatrixINTEL._half_16_2_3_3 = type opaque - -$_ZTSN2cl4sycl6detail16AssertInfoCopierE = comdat any +%"class.sycl::_V1::detail::half_impl::half" = type { half } -$_ZTSZ4mainE11matrix_test = comdat any +$_ZTSZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_E7imatrix = comdat any @__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 @__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail16AssertInfoCopierE(%"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* byval(%"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 { +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_E7imatrix(ptr addrspace(1) noundef align 2 %_arg_accA, ptr addrspace(1) noundef align 2 %_arg_accB, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat !srcloc !49 !kernel_arg_buffer_location !50 !kernel_arg_runtime_aligned !51 !kernel_arg_exclusive_ptr !51 !intel_reqd_sub_group_size !52 !sycl_used_aspects !53 !sycl_fixed_targets !54 !sycl_kernel_omit_args !55 { entry: - %0 = getelementptr inbounds %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id", %"class._ZTSN2cl4sycl2idILi1EEE.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0 - %1 = addrspacecast i64* %0 to i64 addrspace(4)* - %2 = load i64, i64 addrspace(4)* %1, align 8 - %add.ptr.i = getelementptr inbounds %"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened", %"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, i64 %2 - %3 = bitcast %"struct._ZTSN2cl4sycl6detail14AssertHappenedE.cl::sycl::detail::AssertHappened" addrspace(1)* %add.ptr.i to i8 addrspace(1)* - %4 = addrspacecast i8 addrspace(1)* %3 to i8 addrspace(4)* - tail call spir_func void @__devicelib_assert_read(i8 addrspace(4)* %4) #3 - ret void -} - -; Function Attrs: convergent -declare extern_weak dso_local spir_func void @__devicelib_assert_read(i8 addrspace(4)*) local_unnamed_addr #1 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZ4mainE11matrix_test(float addrspace(1)* %_arg_, i64 %_arg_1, %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_3, %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_5) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 !intel_reqd_sub_group_size !7 { -entry: - %0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !8 - %1 = extractelement <3 x i64> %0, i64 1 - %2 = extractelement <3 x i64> %0, i64 0 - %3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInLocalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15 - %4 = extractelement <3 x i64> %3, i64 1 - %5 = extractelement <3 x i64> %3, i64 0 - %cmp.i.i = icmp ult i64 %1, 2147483648 + call void @__itt_offload_wi_start_wrapper() + %0 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, i64 8), align 8, !noalias !56 + %1 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !56 + %2 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !63 + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !63 + %cmp.i.i = icmp ult i64 %0, 2147483648 tail call void @llvm.assume(i1 %cmp.i.i) - %cmp.i45.i = icmp ult i64 %2, 2147483648 - tail call void @llvm.assume(i1 %cmp.i45.i) - %cmp.i43.i = icmp ult i64 %4, 2147483648 - tail call void @llvm.assume(i1 %cmp.i43.i) - %sub.i = sub nsw i64 %1, %4 - %cmp.i41.i = icmp ult i64 %5, 2147483648 - tail call void @llvm.assume(i1 %cmp.i41.i) - %sub5.i = sub nsw i64 %2, %5 - %mul6.i = shl nsw i64 %sub.i, 6 - %add.ptr.i51 = getelementptr inbounds float, float addrspace(1)* %_arg_, i64 %mul6.i - %add.ptr7.i52 = getelementptr inbounds float, float addrspace(1)* %add.ptr.i51, i64 %sub5.i - %add.ptr7.i = addrspacecast float addrspace(1)* %add.ptr7.i52 to float addrspace(4)* - %call8.i = tail call spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)* %add.ptr7.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %add.ptr11.i53 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_3, i64 %mul6.i - %add.ptr16.i55 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %_arg_5, i64 %sub5.i + %cmp.i61.i = icmp ult i64 %1, 2147483648 + tail call void @llvm.assume(i1 %cmp.i61.i) + %cmp.i63.i = icmp ult i64 %2, 2147483648 + tail call void @llvm.assume(i1 %cmp.i63.i) + %sub.i = sub nsw i64 %0, %2 + %cmp.i66.i = icmp ult i64 %3, 2147483648 + tail call void @llvm.assume(i1 %cmp.i66.i) + %sub5.i = sub nsw i64 %1, %3 + %mul.i = shl nsw i64 %sub.i, 3 + %mul8.i = mul i64 %mul.i, %_arg_N + %add.ptr.i.i = getelementptr inbounds float, ptr addrspace(1) %_arg_accC, i64 %mul8.i + %div58.i = and i64 %sub5.i, -16 + %add.ptr.i80.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i.i, i64 %div58.i + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef %add.ptr.i80.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %mul34.i = shl nsw i64 %div58.i, 1 + %div1159.i = lshr i64 %_arg_K, 4 + %mul18.i = mul i64 %mul.i, %_arg_K + %add.ptr.i95.i = getelementptr inbounds %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %_arg_accA, i64 %mul18.i + %mul30.i = shl i64 %_arg_N, 1 + %invariant.gep = getelementptr %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %_arg_accB, i64 %mul34.i br label %for.cond.i for.cond.i: ; preds = %for.body.i, %entry + %sub_c.sroa.0.0.i = phi target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) [ %call1.i.i, %entry ], [ %call.i.i, %for.body.i ] %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] - %C.0.i = phi %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* [ %call8.i, %entry ], [ %call19.i, %for.body.i ] - %cmp.i = icmp ult i32 %k.0.i, 32 - br i1 %cmp.i, label %for.body.i, label %_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit + %conv.i = zext i32 %k.0.i to i64 + %cmp.i = icmp ugt i64 %div1159.i, %conv.i + br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit for.body.i: ; preds = %for.cond.i - %idx.ext46.i = zext i32 %k.0.i to i64 - %add.ptr12.i54 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr11.i53, i64 %idx.ext46.i - %add.ptr12.i = addrspacecast %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr12.i54 to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %call13.i = tail call spir_func %spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm2ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* %add.ptr12.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %mul14.i = shl nuw nsw i32 %k.0.i, 5 - %idx.ext1547.i = zext i32 %mul14.i to i64 - %add.ptr17.i56 = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr16.i55, i64 %idx.ext1547.i - %add.ptr17.i = addrspacecast %"class.cl::sycl::detail::half_impl::half" addrspace(1)* %add.ptr17.i56 to %"class.cl::sycl::detail::half_impl::half" addrspace(4)* - %call18.i = tail call spir_func %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm16ELm2ELN5__spv12MatrixLayoutE3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* %add.ptr17.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 - %call19.i = tail call spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN2cl4sycl6detail9half_impl4halfEfLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS6_3ELS6_0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS9_IT_XT1_EXT2_EXT4_EXT7_EEEPNS9_ISD_XT2_EXT3_EXT5_EXT7_EEESC_S8_(%spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)* %call13.i, %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)* %call18.i, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* %C.0.i, i32 3) #3 - %add.i = add nuw nsw i32 %k.0.i, 16 - br label %for.cond.i, !llvm.loop !20 - -_ZZ4mainENKUlN2cl4sycl7nd_itemILi2EEEE_clES2_.exit: ; preds = %for.cond.i - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)* %add.ptr7.i, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* %C.0.i, i64 %_arg_1, i32 0, i32 3, i32 0) #3 + %mul19.i = shl nsw i32 %k.0.i, 4 + %conv20.i = zext i32 %mul19.i to i64 + %add.ptr.i96.i = getelementptr inbounds %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %add.ptr.i95.i, i64 %conv20.i + %call1.i74.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i96.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + %div27.i = shl nsw i32 %k.0.i, 3 + %conv28.i = zext i32 %div27.i to i64 + %mul31.i = mul i64 %mul30.i, %conv28.i + %gep = getelementptr %"class.sycl::_V1::detail::half_impl::half", ptr addrspace(1) %invariant.gep, i64 %mul31.i + %call1.i78.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef %mul30.i, i32 noundef 2, i32 noundef 3, i32 noundef 0) #3 + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V16detail9half_impl4halfEfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) noundef %call1.i74.i, target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) noundef %call1.i78.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i32 noundef 3) #3, !noalias !68 + %add.i = add nuw nsw i32 %k.0.i, 1 + br label %for.cond.i, !llvm.loop !71 + +_ZZZ15matrix_multiplyIfN4sycl3_V16detail9half_impl4halfELm16ELm32ELm16ELm64ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS5_IT0_XT1_EXT2_EERS5_IS9_XT3_EXT4_EEENKUlRNS1_7handlerEE_clESF_ENKUlNS1_7nd_itemILi2EEEE_clESI_.exit: ; preds = %for.cond.i + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i80.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %sub_c.sroa.0.0.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 + call void @__itt_offload_wi_finish_wrapper() ret void } -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #1 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z28__spirv_JointMatrixLoadINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEPNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS3_S5_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V16detail9half_impl4halfES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm2ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V16detail9half_impl4halfEfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", half, 8, 16, 0, 3, 0) noundef, target("spirv.JointMatrixINTEL", half, 16, 16, 2, 3, 1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIN2cl4sycl6detail9half_impl4halfELm16ELm2ELN5__spv12MatrixLayoutE3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPSA_mS6_S8_i(%"class.cl::sycl::detail::half_impl::half" addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 -; Function Attrs: convergent -declare dso_local spir_func %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN2cl4sycl6detail9half_impl4halfEfLm2ELm16ELm2ELN5__spv12MatrixLayoutE0ELS6_3ELS6_0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT6_EXT7_EEEPNS9_IT_XT1_EXT2_EXT4_EXT7_EEEPNS9_ISD_XT2_EXT3_EXT5_EXT7_EEESC_S8_(%spirv.JointMatrixINTEL._half_2_16_0_3 addrspace(4)*, %spirv.JointMatrixINTEL._half_16_2_3_3 addrspace(4)*, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)*, i32) local_unnamed_addr #1 +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm2ELm2ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)*, %spirv.JointMatrixINTEL._float_2_2_0_3 addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1 +declare void @__itt_offload_wi_start_wrapper() -; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn -declare void @llvm.assume(i1 noundef) #2 +declare void @__itt_offload_wi_finish_wrapper() -attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/intel/build/joint_matrix_test.cpp" "uniform-work-group-size"="true" } -attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #2 = { inaccessiblememonly nofree nosync nounwind willreturn } -attributes #3 = { convergent } +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="../joint_matrix_half.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #2 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent nounwind } !llvm.module.flags = !{!0, !1} !opencl.spir.version = !{!2} !spirv.Source = !{!3} -!opencl.used.extensions = !{!4} -!opencl.used.optional.core.features = !{!4} -!opencl.compiler.options = !{!4} -!llvm.ident = !{!5} +!sycl_types_that_use_aspects = !{!4} +!sycl_aspects = !{!5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47} +!llvm.ident = !{!48} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"frame-pointer", i32 2} !2 = !{i32 1, i32 2} !3 = !{i32 4, i32 100000} -!4 = !{} -!5 = !{!"Clang"} -!6 = !{i32 -1, i32 -1, i32 -1, i32 -1} -!7 = !{i32 16} -!8 = !{!9, !11, !13} -!9 = distinct !{!9, !10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!10 = distinct !{!10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!11 = distinct !{!11, !12, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!12 = distinct !{!12, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!13 = distinct !{!13, !14, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} -!14 = distinct !{!14, !"_ZN2cl4sycl6detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} -!15 = !{!16, !18, !13} -!16 = distinct !{!16, !17, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv: %agg.result"} -!17 = distinct !{!17, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN2cl4sycl2idILi2EEEE8initSizeEv"} -!18 = distinct !{!18, !19, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v: %agg.result"} -!19 = distinct !{!19, !"_ZN7__spirvL21initLocalInvocationIdILi2EN2cl4sycl2idILi2EEEEET0_v"} -!20 = distinct !{!20, !21, !22} -!21 = !{!"llvm.loop.mustprogress"} -!22 = !{!"llvm.loop.unroll.disable"} +!4 = !{!"class.sycl::_V1::detail::half_impl::half", i32 5} +!5 = !{!"cpu", i32 1} +!6 = !{!"gpu", i32 2} +!7 = !{!"accelerator", i32 3} +!8 = !{!"custom", i32 4} +!9 = !{!"fp16", i32 5} +!10 = !{!"fp64", i32 6} +!11 = !{!"image", i32 9} +!12 = !{!"online_compiler", i32 10} +!13 = !{!"online_linker", i32 11} +!14 = !{!"queue_profiling", i32 12} +!15 = !{!"usm_device_allocations", i32 13} +!16 = !{!"usm_host_allocations", i32 14} +!17 = !{!"usm_shared_allocations", i32 15} +!18 = !{!"usm_system_allocations", i32 17} +!19 = !{!"ext_intel_pci_address", i32 18} +!20 = !{!"ext_intel_gpu_eu_count", i32 19} +!21 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!22 = !{!"ext_intel_gpu_slices", i32 21} +!23 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!24 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!25 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!26 = !{!"ext_intel_mem_channel", i32 25} +!27 = !{!"usm_atomic_host_allocations", i32 26} +!28 = !{!"usm_atomic_shared_allocations", i32 27} +!29 = !{!"atomic64", i32 28} +!30 = !{!"ext_intel_device_info_uuid", i32 29} +!31 = !{!"ext_oneapi_srgb", i32 30} +!32 = !{!"ext_oneapi_native_assert", i32 31} +!33 = !{!"host_debuggable", i32 32} +!34 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!35 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!36 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!37 = !{!"ext_intel_free_memory", i32 36} +!38 = !{!"ext_intel_device_id", i32 37} +!39 = !{!"ext_intel_memory_clock_rate", i32 38} +!40 = !{!"ext_intel_memory_bus_width", i32 39} +!41 = !{!"emulated", i32 40} +!42 = !{!"ext_intel_legacy_image", i32 41} +!43 = !{!"int64_base_atomics", i32 7} +!44 = !{!"int64_extended_atomics", i32 8} +!45 = !{!"usm_system_allocator", i32 17} +!46 = !{!"usm_restricted_shared_allocations", i32 16} +!47 = !{!"host", i32 0} +!48 = !{!"clang version 17.0.0 (https://github.com/intel/llvm.git 93f477358d74ae90024f758e7eeb97d4b13cea42)"} +!49 = !{i32 10643216} +!50 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1} +!51 = !{i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false} +!52 = !{i32 16} +!53 = !{i32 5} +!54 = !{} +!55 = !{i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false} +!56 = !{!57, !59, !61} +!57 = distinct !{!57, !58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!58 = distinct !{!58, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!59 = distinct !{!59, !60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!60 = distinct !{!60, !"_ZN7__spirvL22initGlobalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!61 = distinct !{!61, !62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!62 = distinct !{!62, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!63 = !{!64, !66, !61} +!64 = distinct !{!64, !65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!65 = distinct !{!65, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!66 = distinct !{!66, !67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!67 = distinct !{!67, !"_ZN7__spirvL21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!68 = !{!69} +!69 = distinct !{!69, !70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_: %agg.result"} +!70 = distinct !{!70, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS0_6detail9half_impl4halfES9_fLm8ELm16ELm16ELNS4_6layoutE0ELSA_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELSA_3EEESC_RNSB_ISC_T0_LSE_0EXT3_EXT4_EXT6_EEERNSB_ISC_T1_LSE_1EXT4_EXT5_EXT7_EEERSF_"} +!71 = distinct !{!71, !72} +!72 = !{!"llvm.loop.mustprogress"} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll deleted file mode 100644 index 5dbb67fb8c..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/joint_matrix_tf32.ll +++ /dev/null @@ -1,204 +0,0 @@ -; RUN: llvm-as -opaque-pointers=0 < %s -o %t.bc - -; RUN: llvm-spirv %t.bc -opaque-pointers=0 --spirv-ext=+SPV_INTEL_tensor_float32_conversion,+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o %t.spt -; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - -; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -; RUN: llvm-dis -opaque-pointers=0 < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV-DAG: Capability TensorFloat32RoundingINTEL -; CHECK-SPIRV-DAG: Capability JointMatrixINTEL -; CHECK-SPIRV-DAG: Capability JointMatrixTF32ComponentTypeINTEL -; CHECK-SPIRV-DAG: Extension "SPV_INTEL_tensor_float32_conversion" -; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" -; CHECK-SPIRV-DAG: TypeInt [[#TypeInt:]] 32 0 -; CHECK-SPIRV-DAG: Constant [[#TypeInt]] [[#CTI:]] 1 {{$}} -; CHECK-SPIRV-DAG: TypeFloat [[#FloatTy:]] 32 -; CHECK-SPIRV: TypeJointMatrixINTEL [[#]] [[#FloatTy]] [[#]] [[#]] [[#]] [[#]] [[#]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#]] [[#FloatTy]] [[#]] [[#]] [[#]] [[#]] [[#]] [[#CTI]] -; CHECK-SPIRV: TypeJointMatrixINTEL [[#]] [[#FloatTy]] [[#]] [[#]] [[#]] [[#]] [[#]] [[#CTI]] - -; CHECK-LLVM: %spirv.JointMatrixINTEL._float_8_16_3_3_2 = type opaque -; CHECK-LLVM: %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 = type opaque -; CHECK-LLVM: %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 = type opaque - -; ModuleID = 'matrix-tf32-test-sycl-spir64-unknown-unknown.bc' -source_filename = "matrix-tf32-test.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } -%"class.sycl::_V1::detail::array" = type { [2 x i64] } -%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } -%spirv.JointMatrixINTEL._float_8_16_3_3_2 = type opaque -%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 = type opaque -%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 = type opaque - -$_ZTSZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix = comdat any - -@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix(float addrspace(1)* noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K, float addrspace(1)* noundef align 4 %_arg_accA, float addrspace(1)* noundef align 4 %_arg_accB, %"class.sycl::_V1::range"* noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB8, %"class.sycl::_V1::id"* noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB9) local_unnamed_addr #0 { -entry: - %agg.tmp19.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", %"class.sycl::_V1::range"* %_arg_accB8, i64 0, i32 0, i32 0, i64 1 - %agg.tmp19.sroa.0.sroa.2.0.copyload = load i64, i64* %agg.tmp19.sroa.0.sroa.2.0..sroa_idx, align 8 - %0 = getelementptr inbounds %"class.sycl::_V1::id", %"class.sycl::_V1::id"* %_arg_accB9, i64 0, i32 0, i32 0, i64 0 - %agg.tmp20.sroa.0.sroa.0.0.copyload = load i64, i64* %0, align 8 - %agg.tmp20.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::id", %"class.sycl::_V1::id"* %_arg_accB9, i64 0, i32 0, i32 0, i64 1 - %agg.tmp20.sroa.0.sroa.2.0.copyload = load i64, i64* %agg.tmp20.sroa.0.sroa.2.0..sroa_idx, align 8 - %mul.i4.i.i.i.i67 = mul i64 %agg.tmp20.sroa.0.sroa.0.0.copyload, %agg.tmp19.sroa.0.sroa.2.0.copyload - %add.i6.i.i.i.i68 = add i64 %mul.i4.i.i.i.i67, %agg.tmp20.sroa.0.sroa.2.0.copyload - %add.ptr.i69 = getelementptr inbounds float, float addrspace(1)* %_arg_accB, i64 %add.i6.i.i.i.i68 - %1 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId, align 32 - %2 = extractelement <3 x i64> %1, i64 1 - %3 = extractelement <3 x i64> %1, i64 0 - %4 = load <3 x i64>, <3 x i64> addrspace(1)* @__spirv_BuiltInLocalInvocationId, align 32 - %5 = extractelement <3 x i64> %4, i64 1 - %6 = extractelement <3 x i64> %4, i64 0 - %cmp.i.i = icmp ult i64 %2, 2147483648 - tail call void @llvm.assume(i1 %cmp.i.i) - %cmp.i136.i = icmp ult i64 %3, 2147483648 - tail call void @llvm.assume(i1 %cmp.i136.i) - %cmp.i138.i = icmp ult i64 %5, 2147483648 - tail call void @llvm.assume(i1 %cmp.i138.i) - %sub.i = sub nsw i64 %2, %5 - %cmp.i140.i = icmp ult i64 %6, 2147483648 - tail call void @llvm.assume(i1 %cmp.i140.i) - %sub5.i = sub nsw i64 %3, %6 - %mul.i = shl nsw i64 %sub.i, 3 - %mul8.i = mul i64 %mul.i, %_arg_N - %add.ptr.i.i = getelementptr inbounds float, float addrspace(1)* %_arg_accC, i64 %mul8.i - %div134.i = and i64 %sub5.i, -16 - %add.ptr.i182.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i.i, i64 %div134.i - %call.ascast.i.i = addrspacecast float addrspace(1)* %add.ptr.i182.i to float addrspace(4)* - %call1.i.i = tail call spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS2_S4_i(float addrspace(4)* noundef %call.ascast.i.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 - %mul17.i = mul i64 %mul.i, %_arg_K - %add.ptr.i194.i = getelementptr inbounds float, float addrspace(1)* %_arg_accA, i64 %mul17.i - %idx.neg.i.i205.i = sub i64 0, %add.i6.i.i.i.i68 - %add.ptr.i.i206334.i = getelementptr float, float addrspace(1)* %add.ptr.i69, i64 %div134.i - %add.ptr.i209333.i = getelementptr float, float addrspace(1)* %add.ptr.i.i206334.i, i64 %idx.neg.i.i205.i - br label %for.cond.i - -for.cond.i: ; preds = %for.cond.cleanup58.i, %entry - %sub_a.sroa.0.0.i = phi %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* [ undef, %entry ], [ %sub_a.sroa.0.1.i, %for.cond.cleanup58.i ] - %sub_c.sroa.0.0.i = phi %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* [ %call1.i.i, %entry ], [ %call.i168.i, %for.cond.cleanup58.i ] - %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.cond.cleanup58.i ] - %conv.i = zext i32 %k.0.i to i64 - %cmp.i = icmp ult i64 %conv.i, %_arg_K - br i1 %cmp.i, label %for.body.i, label %for.cond82.i - -for.body.i: ; preds = %for.cond.i - %add.ptr.i197.i = getelementptr inbounds float, float addrspace(1)* %add.ptr.i194.i, i64 %conv.i - %call.ascast.i148.i = addrspacecast float addrspace(1)* %add.ptr.i197.i to float addrspace(4)* - %call1.i149.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef %call.ascast.i148.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 - %mul26.i = mul i64 %conv.i, %_arg_N - %add.ptr.i212.i = getelementptr float, float addrspace(1)* %add.ptr.i209333.i, i64 %mul26.i - %call.ascast.i155.i = addrspacecast float addrspace(1)* %add.ptr.i212.i to float addrspace(4)* - %call1.i156.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef %call.ascast.i155.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 - br label %for.cond30.i - -for.cond30.i: ; preds = %for.body37.i, %for.body.i - %sub_a.sroa.0.1.i = phi %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* [ %call1.i149.i, %for.body.i ], [ %call.i225.i, %for.body37.i ] - %i.0.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body37.i ] - %conv31.i = zext i32 %i.0.i to i64 - %call.i215.i = tail call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i) #3 - %cmp35.i = icmp ugt i64 %call.i215.i, %conv31.i - br i1 %cmp35.i, label %for.body37.i, label %for.cond52.i - -for.body37.i: ; preds = %for.cond30.i - %call.i218.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, i64 noundef %conv31.i) #3 - %call.i.i = tail call spir_func noundef float @_Z25__spirv_RoundFToTF32INTELf(float noundef %call.i218.i) #3 - %call.i225.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, float noundef %call.i.i, i64 noundef %conv31.i) #3 - %inc.i = add nuw nsw i32 %i.0.i, 1 - br label %for.cond30.i - -for.cond52.i: ; preds = %for.cond30.i, %for.body59.i - %sub_b.sroa.0.0.i = phi %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* [ %call.i243.i, %for.body59.i ], [ %call1.i156.i, %for.cond30.i ] - %i51.0.i = phi i32 [ %inc74.i, %for.body59.i ], [ 0, %for.cond30.i ] - %conv53.i = zext i32 %i51.0.i to i64 - %call.i229.i = tail call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i) #3 - %cmp57.i = icmp ugt i64 %call.i229.i, %conv53.i - br i1 %cmp57.i, label %for.body59.i, label %for.cond.cleanup58.i - -for.cond.cleanup58.i: ; preds = %for.cond52.i - %call.i168.i = tail call spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS9_1ELS9_2ELNS8_12MatrixLayoutE0ELSA_0ELSA_3ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSD_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSD_ISH_XT2_EXT3_EXT8_EXT10_EXT5_EEESG_SC_(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.1.i, %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef %sub_c.sroa.0.0.i, i32 noundef 3) #3 - %add.i = add nuw nsw i32 %k.0.i, 16 - br label %for.cond.i - -for.body59.i: ; preds = %for.cond52.i - %call.i236.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, i64 noundef %conv53.i) #3 - %call.i171.i = tail call spir_func noundef float @_Z25__spirv_RoundFToTF32INTELf(float noundef %call.i236.i) #3 - %call.i243.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef %sub_b.sroa.0.0.i, float noundef %call.i171.i, i64 noundef %conv53.i) #3 - %inc74.i = add nuw nsw i32 %i51.0.i, 1 - br label %for.cond52.i - -for.cond82.i: ; preds = %for.cond.i, %for.body87.i - %sub_a.sroa.0.2.i = phi %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* [ %call5.i.i, %for.body87.i ], [ %sub_a.sroa.0.0.i, %for.cond.i ] - %i81.0.i = phi i32 [ %inc96.i, %for.body87.i ], [ 0, %for.cond.i ] - %conv83.i = zext i32 %i81.0.i to i64 - %call.i247.i = tail call spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i) #3 - %cmp85.i = icmp ugt i64 %call.i247.i, %conv83.i - br i1 %cmp85.i, label %for.body87.i, label %_ZZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit - -for.body87.i: ; preds = %for.cond82.i - %call.i269.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i, i64 noundef %conv83.i) #3 - %call.i276.i = tail call spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i, i64 noundef %conv83.i) #3 - %mul.i.i = fmul float %call.i276.i, 2.000000e+00 - %call5.i.i = tail call spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef %sub_a.sroa.0.2.i, float noundef %mul.i.i, i64 noundef %conv83.i) #3 - %inc96.i = add nuw nsw i32 %i81.0.i, 1 - br label %for.cond82.i - -_ZZZ15matrix_multiplyIffLm16ELm32ELm32ELm32ELm16ELm32EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond82.i - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS2_S4_i(float addrspace(4)* noundef %call.ascast.i.i, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef %sub_c.sroa.0.0.i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #3 - call void @__itt_offload_wi_finish_wrapper() - ret void -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) -declare void @llvm.assume(i1 noundef) #1 - -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS2_S4_i(float addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mSA_SC_i(float addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef float @_Z25__spirv_RoundFToTF32INTELf(float noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef, i64 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm8ELm16ELN5__spv9MatrixUseE0ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef, float noundef, i64 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEmPNS8_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEE(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef float @_Z28__spirv_VectorExtractDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EET_PNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEm(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef, i64 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32ELm16ELm16ELN5__spv9MatrixUseE1ELNS8_12MatrixLayoutE0ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEESG_T_m(%spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef, float noundef, i64 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi12experimental6matrix9precision4tf32EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS9_1ELS9_2ELNS8_12MatrixLayoutE0ELSA_0ELSA_3ELNS8_5Scope4FlagE3EEPNS8_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSD_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSD_ISH_XT2_EXT3_EXT8_EXT10_EXT5_EEESG_SC_(%spirv.JointMatrixINTEL._tf32_8_16_0_3_0 addrspace(4)* noundef, %spirv.JointMatrixINTEL._tf32_16_16_0_3_1 addrspace(4)* noundef, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS2_S4_i(float addrspace(4)* noundef, %spirv.JointMatrixINTEL._float_8_16_3_3_2 addrspace(4)* noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -declare void @__itt_offload_wi_finish_wrapper() - -attributes #0 = { convergent norecurse "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="matrix-tf32-test.cpp" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } -attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #3 = { convergent } diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/opaque_joint_matrix.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/opaque_joint_matrix.ll deleted file mode 100644 index 5b59c2d8f1..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/opaque_joint_matrix.ll +++ /dev/null @@ -1,151 +0,0 @@ -; RUN: llvm-as < %s -o %t.bc -; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o %t.spt -; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV - -; RUN: llvm-spirv -r %t.spv -o %t.rev.bc -opaque-pointers=0 -; RUN: llvm-dis -opaque-pointers=0 < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM - -; CHECK-SPIRV-DAG: Capability JointMatrixINTEL -; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" -; CHECK-SPIRV-DAG: TypeInt [[#Int8Ty:]] 8 0 -; CHECK-SPIRV-DAG: TypeInt [[#Int32Ty:]] 32 0 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const12:]] 12 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const0:]] 0 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const48:]] 48 -; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy1:]] [[#Int32Ty]] [[#Const12]] [[#Const12]] [[#Const3]] [[#Const3]] [[#Const2]] -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy2:]] [[#Int8Ty]] [[#Const12]] [[#Const48]] [[#Const0]] [[#Const3]] [[#Const0]] -; CHECK-SPIRV-DAG: TypeJointMatrixINTEL [[#MatTy3:]] [[#Int8Ty]] [[#Const48]] [[#Const12]] [[#Const2]] [[#Const3]] [[#Const1]] - -; CHECK-LLVM-DAG: %spirv.JointMatrixINTEL._int_12_12_3_3_2 = type opaque -; CHECK-LLVM-DAG: %spirv.JointMatrixINTEL._char_12_48_0_3_0 = type opaque -; CHECK-LLVM-DAG: %spirv.JointMatrixINTEL._char_48_12_2_3_1 = type opaque - -; ModuleID = 'test-matrix-opaque.bc' -source_filename = "matrix-int8-test.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } -%"class.sycl::_V1::detail::array" = type { [2 x i64] } -%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } - -$_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix = comdat any - -@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 -@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 - -; Function Attrs: convergent norecurse -define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_E7imatrix(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { -entry: - %sub_c.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 - %ref.tmp29.sroa.0.i = alloca target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), align 8 - %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 - %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 - %0 = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 0 - %agg.tmp16.sroa.0.sroa.0.0.copyload = load i64, ptr %0, align 8 - %agg.tmp16.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 1 - %agg.tmp16.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp16.sroa.0.sroa.2.0..sroa_idx, align 8 - %mul.i4.i.i.i.i45 = mul i64 %agg.tmp16.sroa.0.sroa.0.0.copyload, %agg.tmp15.sroa.0.sroa.2.0.copyload - %add.i6.i.i.i.i46 = add i64 %mul.i4.i.i.i.i45, %agg.tmp16.sroa.0.sroa.2.0.copyload - %add.ptr.i47 = getelementptr inbounds i8, ptr addrspace(1) %_arg_accB, i64 %add.i6.i.i.i.i46 - %1 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32 - %2 = extractelement <3 x i64> %1, i64 1 - %3 = extractelement <3 x i64> %1, i64 0 - %4 = load <3 x i64>, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32 - %5 = extractelement <3 x i64> %4, i64 1 - %6 = extractelement <3 x i64> %4, i64 0 - %cmp.i.i = icmp ult i64 %2, 2147483648 - %cmp.i54.i = icmp ult i64 %3, 2147483648 - %cmp.i56.i = icmp ult i64 %5, 2147483648 - %sub.i = sub nsw i64 %2, %5 - %cmp.i58.i = icmp ult i64 %6, 2147483648 - %sub5.i = sub nsw i64 %3, %6 - %sub_c.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %sub_c.sroa.0.i to ptr - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) - %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef 0) #4 - store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 - %mul.i = mul nsw i64 %sub.i, 12 - %div2452.i = lshr i64 %sub5.i, 4 - %mul26.i = mul i64 %div2452.i, 48 - %div.i = udiv i64 %_arg_K, 48 - %mul11.i = mul i64 %mul.i, %_arg_K - %add.ptr.i93.i = getelementptr inbounds i8, ptr addrspace(1) %_arg_accA, i64 %mul11.i - %idx.neg.i.i104.i = sub i64 0, %add.i6.i.i.i.i46 - %add.ptr.i.i105141.i = getelementptr i8, ptr addrspace(1) %add.ptr.i47, i64 %mul26.i - %mul22.i = shl i64 %_arg_N, 2 - %add.ptr.i108140.i = getelementptr i8, ptr addrspace(1) %add.ptr.i.i105141.i, i64 %idx.neg.i.i104.i - %ref.tmp29.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %ref.tmp29.sroa.0.i to ptr - %7 = bitcast ptr %ref.tmp29.sroa.0.i to ptr - %8 = bitcast ptr %sub_c.sroa.0.i to ptr - br label %for.cond.i - -for.cond.i: ; preds = %for.body.i, %entry - %k.0.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] - %conv.i = zext i32 %k.0.i to i64 - %cmp.i = icmp ugt i64 %div.i, %conv.i - br i1 %cmp.i, label %for.body.i, label %_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit - -for.body.i: ; preds = %for.cond.i - %mul12.i = mul nsw i32 %k.0.i, 48 - %conv13.i = zext i32 %mul12.i to i64 - %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i - %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) - %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 - %div20.i = mul nsw i32 %k.0.i, 12 - %conv21.i = zext i32 %div20.i to i64 - %mul23.i = mul i64 %mul22.i, %conv21.i - %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i - %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) - %call1.i73.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i, i32 noundef 2, i32 noundef 3, i32 noundef 0) #4 - call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 - %call.i77.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef %call1.i.i, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef %call1.i73.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 3) #4 - store target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 - %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %7, align 8 - store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %8, align 8 - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) - %add.i = add nuw nsw i32 %k.0.i, 1 - br label %for.cond.i - -_ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6_EERS0_IT0_XT1_EXT2_EERS0_IS4_XT3_EXT4_EEENKUlRN4sycl3_V17handlerEE_clESC_ENKUlNSA_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond.i - %mul37.i = mul i64 %mul.i, %_arg_N - %add.ptr.i.i = getelementptr inbounds i32, ptr addrspace(1) %_arg_accC, i64 %mul37.i - %mul39.i = mul nuw i64 %div2452.i, 12 - %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i - %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2), ptr %sub_c.sroa.0.i, align 8 - tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i64 noundef %_arg_N, i32 noundef 0, i32 noundef 3, i32 noundef 0) #4 - call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) - ret void -} - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z26__spirv_CompositeConstructIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEES6_(i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) @_Z28__spirv_JointMatrixLoadINTELIaLm12ELm48ELN5__spv9MatrixUseE0ELNS0_12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) @_Z28__spirv_JointMatrixLoadINTELIaLm48ELm12ELN5__spv9MatrixUseE1ELNS0_12MatrixLayoutE2ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT3_EXT4_EXT2_EEEPS6_mS2_S4_i(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIaiLm12ELm48ELm12ELN5__spv9MatrixUseE0ELS1_1ELS1_2ELNS0_12MatrixLayoutE0ELS2_2ELS2_3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNS5_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNS5_IS9_XT2_EXT3_EXT8_EXT10_EXT5_EEES8_S4_(target("spirv.JointMatrixINTEL", i8, 12, 48, 0, 3, 0) noundef, target("spirv.JointMatrixINTEL", i8, 48, 12, 2, 3, 1) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: convergent -declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIiLm12ELm12ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS5_XT0_EXT1_EXT3_EXT4_EXT2_EEEmS2_S4_i(ptr addrspace(4) noundef, target("spirv.JointMatrixINTEL", i32, 12, 12, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 - -attributes #0 = { convergent norecurse "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="matrix-int8-test.cpp" "uniform-work-group-size"="true" } -attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } -attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -attributes #4 = { convergent } diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/sycl_2020_namespace.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/sycl_2020_namespace.ll deleted file mode 100644 index 2a813950fd..0000000000 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/sycl_2020_namespace.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llvm-as -opaque-pointers=0 %s -o %t.bc -; RUN: llvm-spirv %t.bc -opaque-pointers=0 -spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv -; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -; Ensure that ::sycl::_V1.*{half|bfloat16} are parsed as SYCL types. - -; CHECK-DAG: TypeFloat [[#HalfTy:]] 16 -; CHECK-DAG: TypeInt [[#BFloat16Ty:]] 16 - -%"class.sycl::_V1::anything::half" = type { half } -%"class.sycl::_V1::anything::bfloat16" = type { i16 } - -%"struct.__spv::__spirv_JointMatrixINTEL.half" = type { [2 x [2 x [1 x [4 x %"class.sycl::_V1::anything::half"]]]]* } -%"struct.__spv::__spirv_JointMatrixINTEL.bfloat16" = type { [2 x [2 x [1 x [4 x %"class.sycl::_V1::anything::bfloat16"]]]]* } - -define spir_func void @foo(%"struct.__spv::__spirv_JointMatrixINTEL.half" *) { - ret void -} - -define spir_func void @bar(%"struct.__spv::__spirv_JointMatrixINTEL.bfloat16" *) { - ret void -} From e781a911fc4a4990a7737ede355bf3909fac4976 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Tue, 4 Apr 2023 16:26:15 +0200 Subject: [PATCH 07/12] [Backport to 16] Start preparing for TypeJointMatrixINTEL switch (#1935) The patch adds TypeJointMatrixINTELv2 which maps to new type OpCode 6184. Under new OpCode matrix type no longer has Layout parameter. The patch also moved 'scope' to optional matrix muladd instruction. The changes are done only in the consumer part to prepare the switch and make E2E switch backward compatible by preparing consumers ahead of time. Unfortunately there is no way to add a test foe this unless it's binary test, but it seems to be a bit unsafe to add this, so the patch was tested locally. Spec change: intel/llvm#8175 Signed-off-by: Sidorov, Dmitry (cherry picked from commit a6fcade17ab06f4ec674c4b5258a1bf4e275436c) --- lib/SPIRV/OCLUtil.cpp | 1 + lib/SPIRV/SPIRVReader.cpp | 9 ++--- lib/SPIRV/libSPIRV/SPIRVEntry.cpp | 4 +++ lib/SPIRV/libSPIRV/SPIRVInstruction.h | 9 ++--- lib/SPIRV/libSPIRV/SPIRVOpCode.h | 1 + lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h | 1 + lib/SPIRV/libSPIRV/SPIRVType.cpp | 16 ++++++--- lib/SPIRV/libSPIRV/SPIRVType.h | 35 ++++++++++++++++---- lib/SPIRV/libSPIRV/spirv_internal.hpp | 2 ++ 9 files changed, 60 insertions(+), 18 deletions(-) diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp index aadfb0ceae..a403975eb8 100644 --- a/lib/SPIRV/OCLUtil.cpp +++ b/lib/SPIRV/OCLUtil.cpp @@ -898,6 +898,7 @@ SPIRAddressSpace getOCLOpaqueTypeAddrSpace(Op OpCode) { case OpTypeSampler: return SPIRV_SAMPLER_T_ADDR_SPACE; case internal::OpTypeJointMatrixINTEL: + case internal::OpTypeJointMatrixINTELv2: case OpTypeCooperativeMatrixKHR: return SPIRAS_Global; default: diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp index 4ee8151495..0a64d9647c 100644 --- a/lib/SPIRV/SPIRVReader.cpp +++ b/lib/SPIRV/SPIRVReader.cpp @@ -472,10 +472,11 @@ Type *SPIRVToLLVM::transType(SPIRVType *T, bool UseTPT) { auto *MT = static_cast(T); auto R = static_cast(MT->getRows())->getZExtIntValue(); auto C = static_cast(MT->getColumns())->getZExtIntValue(); - auto L = static_cast(MT->getLayout())->getZExtIntValue(); - auto S = static_cast(MT->getScope())->getZExtIntValue(); - SmallVector Params = {(unsigned)R, (unsigned)C, (unsigned)L, - (unsigned)S}; + std::vector Params = {(unsigned)R, (unsigned)C}; + if (auto *Layout = MT->getLayout()) + Params.push_back(static_cast(Layout)->getZExtIntValue()); + Params.push_back( + static_cast(MT->getScope())->getZExtIntValue()); if (auto *Use = MT->getUse()) Params.push_back(static_cast(Use)->getZExtIntValue()); auto *CTI = MT->getComponentTypeInterpretation(); diff --git a/lib/SPIRV/libSPIRV/SPIRVEntry.cpp b/lib/SPIRV/libSPIRV/SPIRVEntry.cpp index 16a918b18e..b860eb9d6e 100644 --- a/lib/SPIRV/libSPIRV/SPIRVEntry.cpp +++ b/lib/SPIRV/libSPIRV/SPIRVEntry.cpp @@ -84,6 +84,10 @@ SPIRVEntry *SPIRVEntry::create(Op OpCode) { static const OpToFactoryMapTy OpToFactoryMap(std::begin(Table), std::end(Table)); + // TODO: To remove this when we make a switch to new version + if (OpCode == internal::OpTypeJointMatrixINTELv2) + OpCode = internal::OpTypeJointMatrixINTEL; + OpToFactoryMapTy::const_iterator Loc = OpToFactoryMap.find(OpCode); if (Loc != OpToFactoryMap.end()) return Loc->second(); diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 2fa007ae96..5c3d0daadf 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -1991,6 +1991,7 @@ class SPIRVCompositeConstruct : public SPIRVInstruction { case OpTypeArray: case OpTypeStruct: case internal::OpTypeJointMatrixINTEL: + case internal::OpTypeJointMatrixINTELv2: case OpTypeCooperativeMatrixKHR: break; default: @@ -3516,10 +3517,10 @@ class SPIRVJointMatrixINTELInst : public SPIRVJointMatrixINTELInstBase { SPIRV##x##INTEL; _SPIRV_OP(JointMatrixLoad, true, 6, true) _SPIRV_OP(JointMatrixStore, false, 5, true) -_SPIRV_OP(JointMatrixMad, true, 7) -_SPIRV_OP(JointMatrixSUMad, true, 7) -_SPIRV_OP(JointMatrixUSMad, true, 7) -_SPIRV_OP(JointMatrixUUMad, true, 7) +_SPIRV_OP(JointMatrixMad, true, 6, true) +_SPIRV_OP(JointMatrixSUMad, true, 6, true) +_SPIRV_OP(JointMatrixUSMad, true, 6, true) +_SPIRV_OP(JointMatrixUUMad, true, 6, true) // TODO: move to SPIRVJointMatrixINTELWorkItemInst _SPIRV_OP(JointMatrixWorkItemLength, true, 4) #undef _SPIRV_OP diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCode.h b/lib/SPIRV/libSPIRV/SPIRVOpCode.h index 7af20dd43c..cfa480faf0 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCode.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCode.h @@ -230,6 +230,7 @@ inline bool isTypeOpCode(Op OpCode) { isSubgroupAvcINTELTypeOpCode(OpCode) || OC == OpTypeVmeImageINTEL || isVCOpCode(OpCode) || OC == internal::OpTypeTokenINTEL || OC == internal::OpTypeJointMatrixINTEL || + OC == internal::OpTypeJointMatrixINTELv2 || OC == OpTypeCooperativeMatrixKHR; } diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h index 30db62d097..a84ee56507 100644 --- a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h +++ b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h @@ -6,6 +6,7 @@ _SPIRV_OP_INTERNAL(ArithmeticFenceINTEL, internal::OpArithmeticFenceINTEL) _SPIRV_OP_INTERNAL(ConvertFToBF16INTEL, internal::OpConvertFToBF16INTEL) _SPIRV_OP_INTERNAL(ConvertBF16ToFINTEL, internal::OpConvertBF16ToFINTEL) _SPIRV_OP_INTERNAL(TypeJointMatrixINTEL, internal::OpTypeJointMatrixINTEL) +_SPIRV_OP_INTERNAL(TypeJointMatrixINTEL, internal::OpTypeJointMatrixINTEL) _SPIRV_OP_INTERNAL(JointMatrixLoadINTEL, internal::OpJointMatrixLoadINTEL) _SPIRV_OP_INTERNAL(JointMatrixStoreINTEL, internal::OpJointMatrixStoreINTEL) _SPIRV_OP_INTERNAL(JointMatrixMadINTEL, internal::OpJointMatrixMadINTEL) diff --git a/lib/SPIRV/libSPIRV/SPIRVType.cpp b/lib/SPIRV/libSPIRV/SPIRVType.cpp index 9f7aac0b2c..f7ba79d3ec 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.cpp +++ b/lib/SPIRV/libSPIRV/SPIRVType.cpp @@ -206,7 +206,8 @@ bool SPIRVType::isTypeStruct() const { return OpCode == OpTypeStruct; } bool SPIRVType::isTypeVector() const { return OpCode == OpTypeVector; } bool SPIRVType::isTypeJointMatrixINTEL() const { - return OpCode == internal::OpTypeJointMatrixINTEL; + return OpCode == internal::OpTypeJointMatrixINTEL || + OpCode == internal::OpTypeJointMatrixINTELv2; } bool SPIRVType::isTypeCooperativeMatrixKHR() const { @@ -290,13 +291,20 @@ void SPIRVTypeForwardPointer::decode(std::istream &I) { } SPIRVTypeJointMatrixINTEL::SPIRVTypeJointMatrixINTEL( - SPIRVModule *M, SPIRVId TheId, SPIRVType *CompType, + SPIRVModule *M, SPIRVId TheId, Op OC, SPIRVType *CompType, std::vector Args) : SPIRVType(M, FixedWC + Args.size(), OC, TheId), CompType(CompType), - Args(Args) {} + Args(std::move(Args)) {} + +SPIRVTypeJointMatrixINTEL::SPIRVTypeJointMatrixINTEL( + SPIRVModule *M, SPIRVId TheId, SPIRVType *CompType, + std::vector Args) + : SPIRVType(M, FixedWC + Args.size(), internal::OpTypeJointMatrixINTEL, + TheId), + CompType(CompType), Args(std::move(Args)) {} SPIRVTypeJointMatrixINTEL::SPIRVTypeJointMatrixINTEL() - : SPIRVType(OC), CompType(nullptr), + : SPIRVType(internal::OpTypeJointMatrixINTEL), CompType(nullptr), Args({nullptr, nullptr, nullptr, nullptr}) {} void SPIRVTypeJointMatrixINTEL::encode(spv_ostream &O) const { diff --git a/lib/SPIRV/libSPIRV/SPIRVType.h b/lib/SPIRV/libSPIRV/SPIRVType.h index c82bcf5d67..834ee7f933 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/lib/SPIRV/libSPIRV/SPIRVType.h @@ -1062,13 +1062,18 @@ class SPIRVTypeTokenINTEL : public SPIRVType { }; class SPIRVTypeJointMatrixINTEL : public SPIRVType { + Op OC; SPIRVType *CompType; std::vector Args; public: - const static Op OC = internal::OpTypeJointMatrixINTEL; const static SPIRVWord FixedWC = 3; - // Complete constructor + // Complete constructor with non-default OC + SPIRVTypeJointMatrixINTEL(SPIRVModule *M, SPIRVId TheId, Op OC, + SPIRVType *CompType, + std::vector Args); + + // Incomplete constructor for default OC SPIRVTypeJointMatrixINTEL(SPIRVModule *M, SPIRVId TheId, SPIRVType *CompType, std::vector Args); // Incomplete constructor @@ -1087,11 +1092,29 @@ class SPIRVTypeJointMatrixINTEL : public SPIRVType { SPIRVType *getCompType() const { return CompType; } SPIRVValue *getRows() const { return Args[0]; } SPIRVValue *getColumns() const { return Args[1]; } - SPIRVValue *getLayout() const { return Args[2]; } - SPIRVValue *getScope() const { return Args[3]; } - SPIRVValue *getUse() const { return Args.size() > 4 ? Args[4] : nullptr; } + + SPIRVValue *getLayout() const { + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args[2]; + return nullptr; + } + + SPIRVValue *getScope() const { + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args[3]; + return Args[2]; + } + + SPIRVValue *getUse() const { + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args.size() > 4 ? Args[4] : nullptr; + return Args[3]; + } + SPIRVValue *getComponentTypeInterpretation() const { - return Args.size() > 5 ? Args[5] : nullptr; + if (this->getOpCode() == internal::OpTypeJointMatrixINTEL) + return Args.size() > 5 ? Args[5] : nullptr; + return Args.size() > 4 ? Args[4] : nullptr; } }; diff --git a/lib/SPIRV/libSPIRV/spirv_internal.hpp b/lib/SPIRV/libSPIRV/spirv_internal.hpp index 3330ae1395..07d75a2db1 100644 --- a/lib/SPIRV/libSPIRV/spirv_internal.hpp +++ b/lib/SPIRV/libSPIRV/spirv_internal.hpp @@ -68,6 +68,7 @@ enum InternalOp { IOpJointMatrixSUMadINTEL = 6128, IOpJointMatrixUSMadINTEL = 6129, IOpJointMatrixUUMadINTEL = 6130, + IOpTypeJointMatrixINTELv2 = 6184, IOpArithmeticFenceINTEL = 6145, IOpCooperativeMatrixLoadCheckedINTEL = 6193, IOpCooperativeMatrixStoreCheckedINTEL = 6194, @@ -181,6 +182,7 @@ _SPIRV_OP(Capability, JointMatrixBF16ComponentTypeINTEL) _SPIRV_OP(Capability, JointMatrixPackedInt2ComponentTypeINTEL) _SPIRV_OP(Capability, JointMatrixPackedInt4ComponentTypeINTEL) _SPIRV_OP(Op, TypeJointMatrixINTEL) +_SPIRV_OP(Op, TypeJointMatrixINTELv2) _SPIRV_OP(Op, JointMatrixLoadINTEL) _SPIRV_OP(Op, JointMatrixStoreINTEL) _SPIRV_OP(Op, JointMatrixMadINTEL) From 16c0d72cc3feb28a71d1694df32ea6d311141b3d Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Fri, 1 Sep 2023 17:25:46 +0300 Subject: [PATCH 08/12] Fix TopologicalSort for Joint and Cooperative matrices (#2143) visit method of the sort relies on getNonLiteralOperands method of the SPIRVType which is being inserted in the module. Without it dependent types can be inserted in the module in incorrect order. For example: TypeCooperativeMatrixKHR %ID% TypeStruct ... %ID% is the correct order, but without the patch in some cases the translator could generate the opposite order. Signed-off-by: Sidorov, Dmitry (cherry picked from commit 436c4972ad94810783ae1529800c64d660be7c6a) --- lib/SPIRV/libSPIRV/SPIRVType.h | 8 + .../array_of_matrices.ll | 436 +++++++++++++++++ .../array_of_matrices.ll | 437 ++++++++++++++++++ 3 files changed, 881 insertions(+) create mode 100644 test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll create mode 100644 test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll diff --git a/lib/SPIRV/libSPIRV/SPIRVType.h b/lib/SPIRV/libSPIRV/SPIRVType.h index 834ee7f933..11b59016d8 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/lib/SPIRV/libSPIRV/SPIRVType.h @@ -1116,6 +1116,10 @@ class SPIRVTypeJointMatrixINTEL : public SPIRVType { return Args.size() > 5 ? Args[5] : nullptr; return Args.size() > 4 ? Args[4] : nullptr; } + + std::vector getNonLiteralOperands() const override { + return std::vector(1, CompType); + } }; class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { @@ -1144,6 +1148,10 @@ class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { SPIRVValue *getRows() const { return Args[1]; } SPIRVValue *getColumns() const { return Args[2]; } SPIRVValue *getUse() const { return Args[3]; } + + std::vector getNonLiteralOperands() const override { + return std::vector(1, CompType); + } }; } // namespace SPIRV diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll new file mode 100644 index 0000000000..0571af5dd9 --- /dev/null +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/array_of_matrices.ll @@ -0,0 +1,436 @@ +;; Compiled from joint_matrix_bf16_fill_k_cache.cpp from https://github.com/intel/llvm +;; command: clang++ -fsycl -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp -fsycl-device-only -o test.bc + +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability JointMatrixINTEL +; CHECK-SPIRV-DAG: Extension "SPV_INTEL_joint_matrix" +; CHECK-SPIRV: TypeInt [[#Int16Ty:]] 16 0 +; CHECK-SPIRV: TypeFloat [[#FloatTy:]] 32 +; CHECK-SPIRV: TypeJointMatrixINTEL [[#MatTy1:]] [[#FloatTy]] +; CHECK-SPIRV: TypeStruct [[#StructTy1:]] [[#MatTy1]] +; CHECK-SPIRV: TypeArray [[#ArrayTy1:]] [[#StructTy1]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy1]] [[#]] +; CHECK-SPIRV: TypeJointMatrixINTEL [[#MatTy2:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeStruct [[#StructTy2:]] [[#MatTy2]] +; CHECK-SPIRV: TypeArray [[#ArrayTy2:]] [[#StructTy2]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy2]] [[#]] +; CHECK-SPIRV: TypeJointMatrixINTEL [[#MatTy3:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeStruct [[#StructTy3:]] [[#MatTy3]] +; CHECK-SPIRV: TypeArray [[#ArrayTy3:]] [[#StructTy3]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy3]] [[#]] + +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) } +; CHECK-LLVM: alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]] + +; ModuleID = 'test.bc' +source_filename = "llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::__generated_multi_ptr" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.0" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.1" = type { ptr addrspace(1) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) } +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } + +$_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_ = comdat any + +@__spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_(ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr") align 8 %_arg_pA, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.0") align 8 %_arg_pB, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.1") align 8 %_arg_pC) local_unnamed_addr #0 comdat !srcloc !59 !kernel_arg_buffer_location !60 !intel_reqd_sub_group_size !61 !sycl_fixed_targets !62 !sycl_kernel_omit_args !63 { +entry: + call void @__itt_offload_wi_start_wrapper() + %tC.i = alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], align 8 + %tA.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], align 8 + %tB.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], align 8 + %0 = load i64, ptr %_arg_pA, align 8, !tbaa !64 + %1 = inttoptr i64 %0 to ptr addrspace(1) + %2 = load i64, ptr %_arg_pB, align 8, !tbaa !64 + %3 = inttoptr i64 %2 to ptr addrspace(1) + %4 = load i64, ptr %_arg_pC, align 8, !tbaa !64 + %5 = inttoptr i64 %4 to ptr addrspace(1) + %6 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, i64 8), align 8, !noalias !68 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32, !noalias !68 + %8 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !75 + %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !75 + %cmp.i.i = icmp ult i64 %6, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + %cmp.i208.i = icmp ult i64 %7, 2147483648 + tail call void @llvm.assume(i1 %cmp.i208.i) + %cmp.i209.i = icmp ult i64 %8, 2147483648 + tail call void @llvm.assume(i1 %cmp.i209.i) + %cmp.i212.i = icmp ult i64 %9, 2147483648 + tail call void @llvm.assume(i1 %cmp.i212.i) + %div205.i = lshr i64 %9, 4 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tC.i) #4 + br label %arrayctor.loop.i + +arrayctor.loop.i: ; preds = %arrayctor.loop.i, %entry + %arrayctor.cur.idx.i = phi i64 [ 0, %entry ], [ %arrayctor.cur.add.i, %arrayctor.loop.i ] + %arrayctor.cur.add.i = add nuw nsw i64 %arrayctor.cur.idx.i, 1 + %arrayctor.done.i = icmp eq i64 %arrayctor.cur.add.i, 16 + br i1 %arrayctor.done.i, label %for.cond.i, label %arrayctor.loop.i + +for.cond.i: ; preds = %arrayctor.loop.i, %for.cond.cleanup7.i + %m.0.i = phi i32 [ %inc12.i, %for.cond.cleanup7.i ], [ 0, %arrayctor.loop.i ] + %cmp.i = icmp ult i32 %m.0.i, 4 + br i1 %cmp.i, label %for.cond5.preheader.i, label %for.cond14.preheader.i + +for.cond5.preheader.i: ; preds = %for.cond.i + %idxprom.i = zext i32 %m.0.i to i64 + br label %for.cond5.i + +for.cond14.preheader.i: ; preds = %for.cond.i + %mul50.i = shl nuw nsw i64 %6, 8 + %mul51.i = shl nuw nsw i64 %8, 5 + %add52.i = add nuw nsw i64 %mul50.i, %mul51.i + %mul80.i = shl nuw nsw i64 %div205.i, 7 + %10 = shl nuw nsw i64 %7, 9 + %11 = add nuw nsw i64 %10, %mul80.i + br label %for.cond14.i + +for.cond5.i: ; preds = %for.body8.i, %for.cond5.preheader.i + %n.0.i = phi i32 [ %inc.i, %for.body8.i ], [ 0, %for.cond5.preheader.i ] + %cmp6.i = icmp ult i32 %n.0.i, 4 + br i1 %cmp6.i, label %for.body8.i, label %for.cond.cleanup7.i + +for.cond.cleanup7.i: ; preds = %for.cond5.i + %inc12.i = add nuw nsw i32 %m.0.i, 1 + br label %for.cond.i, !llvm.loop !80 + +for.body8.i: ; preds = %for.cond5.i + %conv.i = zext i32 %n.0.i to i64 + %arrayidx10.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom.i, i64 %conv.i + %call.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef 0.000000e+00) #5 + store target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %call.i.i, ptr %arrayidx10.i, align 8, !tbaa !82 + %inc.i = add nuw nsw i32 %n.0.i, 1 + br label %for.cond5.i, !llvm.loop !84 + +for.cond14.i: ; preds = %for.cond.cleanup34.i, %for.cond14.preheader.i + %k2.0.i = phi i32 [ %inc129.i, %for.cond.cleanup34.i ], [ 0, %for.cond14.preheader.i ] + %cmp15.i = icmp ult i32 %k2.0.i, 8 + br i1 %cmp15.i, label %for.body17.i, label %for.cond132.preheader.i + +for.cond132.preheader.i: ; preds = %for.cond14.i + %mul156.i = shl nuw nsw i64 %7, 8 + %mul157.i = shl nuw nsw i64 %div205.i, 6 + %add158.i = add nuw nsw i64 %mul156.i, %mul157.i + br label %for.cond132.i + +for.body17.i: ; preds = %for.cond14.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tA.i) #4 + br label %arrayctor.loop20.i + +arrayctor.loop20.i: ; preds = %arrayctor.loop20.i, %for.body17.i + %arrayctor.cur21.idx.i = phi i64 [ 0, %for.body17.i ], [ %arrayctor.cur21.add.i, %arrayctor.loop20.i ] + %arrayctor.cur21.add.i = add nuw nsw i64 %arrayctor.cur21.idx.i, 1 + %arrayctor.done23.i = icmp eq i64 %arrayctor.cur21.add.i, 8 + br i1 %arrayctor.done23.i, label %arrayctor.cont24.i, label %arrayctor.loop20.i + +arrayctor.cont24.i: ; preds = %arrayctor.loop20.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tB.i) #4 + br label %arrayctor.loop27.i + +arrayctor.loop27.i: ; preds = %arrayctor.loop27.i, %arrayctor.cont24.i + %arrayctor.cur28.idx.i = phi i64 [ 0, %arrayctor.cont24.i ], [ %arrayctor.cur28.add.i, %arrayctor.loop27.i ] + %arrayctor.cur28.add.i = add nuw nsw i64 %arrayctor.cur28.idx.i, 1 + %arrayctor.done30.i = icmp eq i64 %arrayctor.cur28.add.i, 8 + br i1 %arrayctor.done30.i, label %for.cond32.preheader.i, label %arrayctor.loop27.i + +for.cond32.preheader.i: ; preds = %arrayctor.loop27.i + %12 = shl nuw i32 %k2.0.i, 1 + br label %for.cond32.i + +for.cond32.i: ; preds = %for.cond.cleanup92.i, %for.cond32.preheader.i + %k1.0.i = phi i32 [ %inc126.i, %for.cond.cleanup92.i ], [ 0, %for.cond32.preheader.i ] + %cmp33.i = icmp ult i32 %k1.0.i, 2 + br i1 %cmp33.i, label %for.body35.i, label %for.cond.cleanup34.i + +for.cond.cleanup34.i: ; preds = %for.cond32.i + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tB.i) #4 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tA.i) #4 + %inc129.i = add nuw nsw i32 %k2.0.i, 1 + br label %for.cond14.i, !llvm.loop !85 + +for.body35.i: ; preds = %for.cond32.i + %13 = add nuw i32 %12, %k1.0.i + %div37206.i = and i32 %13, 268435455 + %idxprom46.i = zext i32 %k1.0.i to i64 + %mul57.i = shl nuw nsw i32 %div37206.i, 4 + %conv58.i = zext i32 %mul57.i to i64 + %invariant.gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %1, i64 %conv58.i + br label %for.cond39.i + +for.cond39.i: ; preds = %for.body42.i, %for.body35.i + %m38.0.i = phi i32 [ 0, %for.body35.i ], [ %inc60.i, %for.body42.i ] + %cmp40.i = icmp ult i32 %m38.0.i, 4 + br i1 %cmp40.i, label %for.body42.i, label %for.cond63.preheader.i + +for.cond63.preheader.i: ; preds = %for.cond39.i + %mul77.i = shl nuw nsw i32 %div37206.i, 12 + %conv78.i = zext i32 %mul77.i to i64 + %add.ptr.i225.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %3, i64 %conv78.i + br label %for.cond63.i + +for.body42.i: ; preds = %for.cond39.i + %idxprom44.i = zext i32 %m38.0.i to i64 + %arrayidx47.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom44.i, i64 %idxprom46.i + %mul53.i = shl nuw nsw i32 %m38.0.i, 3 + %conv54.i = zext i32 %mul53.i to i64 + %add55.i = add nuw nsw i64 %add52.i, %conv54.i + %mul56.i = shl nuw nsw i64 %add55.i, 8 + %gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %invariant.gep, i64 %mul56.i + %call1.i.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) %call1.i.i, ptr %arrayidx47.i, align 8, !tbaa !86 + %inc60.i = add nuw nsw i32 %m38.0.i, 1 + br label %for.cond39.i, !llvm.loop !88 + +for.cond63.i: ; preds = %for.body67.i, %for.cond63.preheader.i + %n62.0.i = phi i32 [ %inc87.i, %for.body67.i ], [ 0, %for.cond63.preheader.i ] + %cmp65.i = icmp ult i32 %n62.0.i, 4 + br i1 %cmp65.i, label %for.body67.i, label %for.cond90.i + +for.body67.i: ; preds = %for.cond63.i + %conv64.i = zext i32 %n62.0.i to i64 + %arrayidx72.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv64.i, i64 %idxprom46.i + %14 = shl nuw nsw i64 %conv64.i, 5 + %mul85.i = add nuw nsw i64 %14, %11 + %add.ptr.i226.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i225.i, i64 %mul85.i + %call1.i219.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i226.i, i64 noundef 512, i32 noundef 2, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) %call1.i219.i, ptr %arrayidx72.i, align 8, !tbaa !89 + %inc87.i = add nuw nsw i32 %n62.0.i, 1 + br label %for.cond63.i, !llvm.loop !91 + +for.cond90.i: ; preds = %for.cond63.i, %for.cond.cleanup98.i + %m89.0.i = phi i32 [ %inc123.i, %for.cond.cleanup98.i ], [ 0, %for.cond63.i ] + %cmp91.i = icmp ult i32 %m89.0.i, 4 + br i1 %cmp91.i, label %for.cond95.preheader.i, label %for.cond.cleanup92.i + +for.cond95.preheader.i: ; preds = %for.cond90.i + %idxprom102.i = zext i32 %m89.0.i to i64 + %arrayidx105.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom102.i, i64 %idxprom46.i + %15 = load target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1), ptr %arrayidx105.i, align 8, !tbaa !86, !noalias !92 + br label %for.cond95.i + +for.cond.cleanup92.i: ; preds = %for.cond90.i + %inc126.i = add nuw nsw i32 %k1.0.i, 1 + br label %for.cond32.i, !llvm.loop !95 + +for.cond95.i: ; preds = %for.body99.i, %for.cond95.preheader.i + %n94.0.i = phi i32 [ %inc120.i, %for.body99.i ], [ 0, %for.cond95.preheader.i ] + %cmp97.i = icmp ult i32 %n94.0.i, 4 + br i1 %cmp97.i, label %for.body99.i, label %for.cond.cleanup98.i + +for.cond.cleanup98.i: ; preds = %for.cond95.i + %inc123.i = add nuw nsw i32 %m89.0.i, 1 + br label %for.cond90.i, !llvm.loop !96 + +for.body99.i: ; preds = %for.cond95.i + %conv96.i = zext i32 %n94.0.i to i64 + %arrayidx109.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv96.i, i64 %idxprom46.i + %arrayidx113.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom102.i, i64 %conv96.i + %16 = load target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1), ptr %arrayidx109.i, align 8, !tbaa !89, !noalias !92 + %17 = load target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2), ptr %arrayidx113.i, align 8, !tbaa !82, !noalias !92 + %call.i221.i = tail call spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef %15, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef %16, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %17, i32 noundef 3) #5, !noalias !92 + store target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) %call.i221.i, ptr %arrayidx113.i, align 8, !tbaa !82 + %inc120.i = add nuw nsw i32 %n94.0.i, 1 + br label %for.cond95.i, !llvm.loop !97 + +for.cond132.i: ; preds = %for.cond.cleanup140.i, %for.cond132.preheader.i + %m131.0.i = phi i32 [ %inc166.i, %for.cond.cleanup140.i ], [ 0, %for.cond132.preheader.i ] + %cmp133.i = icmp ult i32 %m131.0.i, 4 + br i1 %cmp133.i, label %for.cond137.preheader.i, label %_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit + +for.cond137.preheader.i: ; preds = %for.cond132.i + %idxprom143.i = zext i32 %m131.0.i to i64 + %mul152.i = shl nuw nsw i32 %m131.0.i, 3 + %conv153.i = zext i32 %mul152.i to i64 + %add154.i = add nuw nsw i64 %add52.i, %conv153.i + %mul155.i = shl nuw nsw i64 %add154.i, 8 + %add.ptr.i227.i = getelementptr inbounds float, ptr addrspace(1) %5, i64 %mul155.i + br label %for.cond137.i + +for.cond137.i: ; preds = %for.body141.i, %for.cond137.preheader.i + %n136.0.i = phi i32 [ %inc163.i, %for.body141.i ], [ 0, %for.cond137.preheader.i ] + %cmp139.i = icmp ult i32 %n136.0.i, 4 + br i1 %cmp139.i, label %for.body141.i, label %for.cond.cleanup140.i + +for.cond.cleanup140.i: ; preds = %for.cond137.i + %inc166.i = add nuw nsw i32 %m131.0.i, 1 + br label %for.cond132.i, !llvm.loop !98 + +for.body141.i: ; preds = %for.cond137.i + %conv138.i = zext i32 %n136.0.i to i64 + %arrayidx146.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom143.i, i64 %conv138.i + %mul160.i = shl nuw nsw i64 %conv138.i, 4 + %add161.i = add nuw nsw i64 %add158.i, %mul160.i + %add.ptr.i228.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i227.i, i64 %add161.i + %18 = load target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2), ptr %arrayidx146.i, align 8, !tbaa !82 + tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i228.i, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef %18, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + %inc163.i = add nuw nsw i32 %n136.0.i, 1 + br label %for.cond137.i, !llvm.loop !99 + +_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond132.i + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tC.i) #4 + call void @__itt_offload_wi_finish_wrapper() + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) @_Z28__spirv_JointMatrixLoadINTELIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) @_Z27__spirv_JointMatrixMadINTELIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.JointMatrixINTEL", i16, 8, 16, 0, 3, 0, 1) noundef, target("spirv.JointMatrixINTEL", i16, 16, 16, 2, 3, 1, 1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_JointMatrixINTELIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.JointMatrixINTEL", float, 8, 16, 3, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +declare void @__itt_offload_wi_start_wrapper() + +declare void @__itt_offload_wi_finish_wrapper() + +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +attributes #3 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind } +attributes #5 = { convergent nounwind } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57} +!llvm.ident = !{!58} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"cpu", i32 1} +!5 = !{!"gpu", i32 2} +!6 = !{!"accelerator", i32 3} +!7 = !{!"custom", i32 4} +!8 = !{!"fp16", i32 5} +!9 = !{!"fp64", i32 6} +!10 = !{!"image", i32 9} +!11 = !{!"online_compiler", i32 10} +!12 = !{!"online_linker", i32 11} +!13 = !{!"queue_profiling", i32 12} +!14 = !{!"usm_device_allocations", i32 13} +!15 = !{!"usm_host_allocations", i32 14} +!16 = !{!"usm_shared_allocations", i32 15} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"int64_base_atomics", i32 7} +!54 = !{!"int64_extended_atomics", i32 8} +!55 = !{!"usm_system_allocator", i32 17} +!56 = !{!"usm_restricted_shared_allocations", i32 16} +!57 = !{!"host", i32 0} +!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!59 = !{i32 242145} +!60 = !{i32 -1, i32 -1, i32 -1} +!61 = !{i32 16} +!62 = !{} +!63 = !{i1 false, i1 false, i1 false} +!64 = !{!65, !65, i64 0} +!65 = !{!"any pointer", !66, i64 0} +!66 = !{!"omnipotent char", !67, i64 0} +!67 = !{!"Simple C++ TBAA"} +!68 = !{!69, !71, !73} +!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!75 = !{!76, !78, !73} +!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!80 = distinct !{!80, !81} +!81 = !{!"llvm.loop.mustprogress"} +!82 = !{!83, !65, i64 0} +!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!84 = distinct !{!84, !81} +!85 = distinct !{!85, !81} +!86 = !{!87, !65, i64 0} +!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!88 = distinct !{!88, !81} +!89 = !{!90, !65, i64 0} +!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!91 = distinct !{!91, !81} +!92 = !{!93} +!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!95 = distinct !{!95, !81} +!96 = distinct !{!96, !81} +!97 = distinct !{!97, !81} +!98 = distinct !{!98, !81} +!99 = distinct !{!99, !81} diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll new file mode 100644 index 0000000000..64265ab19c --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll @@ -0,0 +1,437 @@ +;; Compiled from joint_matrix_bf16_fill_k_cache.cpp from https://github.com/intel/llvm +;; command: clang++ -fsycl -DSYCL_EXT_ONEAPI_MATRIX_VERSION=4 llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp -fsycl-device-only -o test.bc +;; and then JointMatrixINTEL target ext type was replaced with CooperativeMatrixKHR + +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV-DAG: Capability CooperativeMatrixKHR +; CHECK-SPIRV-DAG: Extension "SPV_KHR_cooperative_matrix" +; CHECK-SPIRV: TypeInt [[#Int16Ty:]] 16 0 +; CHECK-SPIRV: TypeFloat [[#FloatTy:]] 32 +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#FloatTy]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int16Ty]] +; CHECK-SPIRV: TypeStruct [[#StructTy1:]] [[#MatTy1]] +; CHECK-SPIRV: TypeArray [[#ArrayTy1:]] [[#StructTy1]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy1]] [[#]] +; CHECK-SPIRV: TypeStruct [[#StructTy2:]] [[#MatTy2]] +; CHECK-SPIRV: TypeArray [[#ArrayTy2:]] [[#StructTy2]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy2]] [[#]] +; CHECK-SPIRV: TypeStruct [[#StructTy3:]] [[#MatTy3]] +; CHECK-SPIRV: TypeArray [[#ArrayTy3:]] [[#StructTy3]] [[#]] +; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy3]] [[#]] + +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) } +; CHECK-LLVM: alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]] +; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]] + +; ModuleID = 'test.bc' +source_filename = "llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::__generated_multi_ptr" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.0" = type { ptr addrspace(1) } +%"class.sycl::_V1::__generated_multi_ptr.1" = type { ptr addrspace(1) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) } +%"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } + +$_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_ = comdat any + +@__spirv_BuiltInWorkgroupId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: convergent norecurse nounwind +define weak_odr dso_local spir_kernel void @_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_(ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr") align 8 %_arg_pA, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.0") align 8 %_arg_pB, ptr noundef byval(%"class.sycl::_V1::__generated_multi_ptr.1") align 8 %_arg_pC) local_unnamed_addr #0 comdat !srcloc !59 !kernel_arg_buffer_location !60 !intel_reqd_sub_group_size !61 !sycl_fixed_targets !62 !sycl_kernel_omit_args !63 { +entry: + call void @__itt_offload_wi_start_wrapper() + %tC.i = alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], align 8 + %tA.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], align 8 + %tB.i = alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], align 8 + %0 = load i64, ptr %_arg_pA, align 8, !tbaa !64 + %1 = inttoptr i64 %0 to ptr addrspace(1) + %2 = load i64, ptr %_arg_pB, align 8, !tbaa !64 + %3 = inttoptr i64 %2 to ptr addrspace(1) + %4 = load i64, ptr %_arg_pC, align 8, !tbaa !64 + %5 = inttoptr i64 %4 to ptr addrspace(1) + %6 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, i64 8), align 8, !noalias !68 + %7 = load i64, ptr addrspace(1) @__spirv_BuiltInWorkgroupId, align 32, !noalias !68 + %8 = load i64, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, i64 8), align 8, !noalias !75 + %9 = load i64, ptr addrspace(1) @__spirv_BuiltInLocalInvocationId, align 32, !noalias !75 + %cmp.i.i = icmp ult i64 %6, 2147483648 + tail call void @llvm.assume(i1 %cmp.i.i) + %cmp.i208.i = icmp ult i64 %7, 2147483648 + tail call void @llvm.assume(i1 %cmp.i208.i) + %cmp.i209.i = icmp ult i64 %8, 2147483648 + tail call void @llvm.assume(i1 %cmp.i209.i) + %cmp.i212.i = icmp ult i64 %9, 2147483648 + tail call void @llvm.assume(i1 %cmp.i212.i) + %div205.i = lshr i64 %9, 4 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tC.i) #4 + br label %arrayctor.loop.i + +arrayctor.loop.i: ; preds = %arrayctor.loop.i, %entry + %arrayctor.cur.idx.i = phi i64 [ 0, %entry ], [ %arrayctor.cur.add.i, %arrayctor.loop.i ] + %arrayctor.cur.add.i = add nuw nsw i64 %arrayctor.cur.idx.i, 1 + %arrayctor.done.i = icmp eq i64 %arrayctor.cur.add.i, 16 + br i1 %arrayctor.done.i, label %for.cond.i, label %arrayctor.loop.i + +for.cond.i: ; preds = %arrayctor.loop.i, %for.cond.cleanup7.i + %m.0.i = phi i32 [ %inc12.i, %for.cond.cleanup7.i ], [ 0, %arrayctor.loop.i ] + %cmp.i = icmp ult i32 %m.0.i, 4 + br i1 %cmp.i, label %for.cond5.preheader.i, label %for.cond14.preheader.i + +for.cond5.preheader.i: ; preds = %for.cond.i + %idxprom.i = zext i32 %m.0.i to i64 + br label %for.cond5.i + +for.cond14.preheader.i: ; preds = %for.cond.i + %mul50.i = shl nuw nsw i64 %6, 8 + %mul51.i = shl nuw nsw i64 %8, 5 + %add52.i = add nuw nsw i64 %mul50.i, %mul51.i + %mul80.i = shl nuw nsw i64 %div205.i, 7 + %10 = shl nuw nsw i64 %7, 9 + %11 = add nuw nsw i64 %10, %mul80.i + br label %for.cond14.i + +for.cond5.i: ; preds = %for.body8.i, %for.cond5.preheader.i + %n.0.i = phi i32 [ %inc.i, %for.body8.i ], [ 0, %for.cond5.preheader.i ] + %cmp6.i = icmp ult i32 %n.0.i, 4 + br i1 %cmp6.i, label %for.body8.i, label %for.cond.cleanup7.i + +for.cond.cleanup7.i: ; preds = %for.cond5.i + %inc12.i = add nuw nsw i32 %m.0.i, 1 + br label %for.cond.i, !llvm.loop !80 + +for.body8.i: ; preds = %for.cond5.i + %conv.i = zext i32 %n.0.i to i64 + %arrayidx10.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom.i, i64 %conv.i + %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef 0.000000e+00) #5 + store target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) %call.i.i, ptr %arrayidx10.i, align 8, !tbaa !82 + %inc.i = add nuw nsw i32 %n.0.i, 1 + br label %for.cond5.i, !llvm.loop !84 + +for.cond14.i: ; preds = %for.cond.cleanup34.i, %for.cond14.preheader.i + %k2.0.i = phi i32 [ %inc129.i, %for.cond.cleanup34.i ], [ 0, %for.cond14.preheader.i ] + %cmp15.i = icmp ult i32 %k2.0.i, 8 + br i1 %cmp15.i, label %for.body17.i, label %for.cond132.preheader.i + +for.cond132.preheader.i: ; preds = %for.cond14.i + %mul156.i = shl nuw nsw i64 %7, 8 + %mul157.i = shl nuw nsw i64 %div205.i, 6 + %add158.i = add nuw nsw i64 %mul156.i, %mul157.i + br label %for.cond132.i + +for.body17.i: ; preds = %for.cond14.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tA.i) #4 + br label %arrayctor.loop20.i + +arrayctor.loop20.i: ; preds = %arrayctor.loop20.i, %for.body17.i + %arrayctor.cur21.idx.i = phi i64 [ 0, %for.body17.i ], [ %arrayctor.cur21.add.i, %arrayctor.loop20.i ] + %arrayctor.cur21.add.i = add nuw nsw i64 %arrayctor.cur21.idx.i, 1 + %arrayctor.done23.i = icmp eq i64 %arrayctor.cur21.add.i, 8 + br i1 %arrayctor.done23.i, label %arrayctor.cont24.i, label %arrayctor.loop20.i + +arrayctor.cont24.i: ; preds = %arrayctor.loop20.i + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %tB.i) #4 + br label %arrayctor.loop27.i + +arrayctor.loop27.i: ; preds = %arrayctor.loop27.i, %arrayctor.cont24.i + %arrayctor.cur28.idx.i = phi i64 [ 0, %arrayctor.cont24.i ], [ %arrayctor.cur28.add.i, %arrayctor.loop27.i ] + %arrayctor.cur28.add.i = add nuw nsw i64 %arrayctor.cur28.idx.i, 1 + %arrayctor.done30.i = icmp eq i64 %arrayctor.cur28.add.i, 8 + br i1 %arrayctor.done30.i, label %for.cond32.preheader.i, label %arrayctor.loop27.i + +for.cond32.preheader.i: ; preds = %arrayctor.loop27.i + %12 = shl nuw i32 %k2.0.i, 1 + br label %for.cond32.i + +for.cond32.i: ; preds = %for.cond.cleanup92.i, %for.cond32.preheader.i + %k1.0.i = phi i32 [ %inc126.i, %for.cond.cleanup92.i ], [ 0, %for.cond32.preheader.i ] + %cmp33.i = icmp ult i32 %k1.0.i, 2 + br i1 %cmp33.i, label %for.body35.i, label %for.cond.cleanup34.i + +for.cond.cleanup34.i: ; preds = %for.cond32.i + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tB.i) #4 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %tA.i) #4 + %inc129.i = add nuw nsw i32 %k2.0.i, 1 + br label %for.cond14.i, !llvm.loop !85 + +for.body35.i: ; preds = %for.cond32.i + %13 = add nuw i32 %12, %k1.0.i + %div37206.i = and i32 %13, 268435455 + %idxprom46.i = zext i32 %k1.0.i to i64 + %mul57.i = shl nuw nsw i32 %div37206.i, 4 + %conv58.i = zext i32 %mul57.i to i64 + %invariant.gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %1, i64 %conv58.i + br label %for.cond39.i + +for.cond39.i: ; preds = %for.body42.i, %for.body35.i + %m38.0.i = phi i32 [ 0, %for.body35.i ], [ %inc60.i, %for.body42.i ] + %cmp40.i = icmp ult i32 %m38.0.i, 4 + br i1 %cmp40.i, label %for.body42.i, label %for.cond63.preheader.i + +for.cond63.preheader.i: ; preds = %for.cond39.i + %mul77.i = shl nuw nsw i32 %div37206.i, 12 + %conv78.i = zext i32 %mul77.i to i64 + %add.ptr.i225.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %3, i64 %conv78.i + br label %for.cond63.i + +for.body42.i: ; preds = %for.cond39.i + %idxprom44.i = zext i32 %m38.0.i to i64 + %arrayidx47.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom44.i, i64 %idxprom46.i + %mul53.i = shl nuw nsw i32 %m38.0.i, 3 + %conv54.i = zext i32 %mul53.i to i64 + %add55.i = add nuw nsw i64 %add52.i, %conv54.i + %mul56.i = shl nuw nsw i64 %add55.i, 8 + %gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %invariant.gep, i64 %mul56.i + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) %call1.i.i, ptr %arrayidx47.i, align 8, !tbaa !86 + %inc60.i = add nuw nsw i32 %m38.0.i, 1 + br label %for.cond39.i, !llvm.loop !88 + +for.cond63.i: ; preds = %for.body67.i, %for.cond63.preheader.i + %n62.0.i = phi i32 [ %inc87.i, %for.body67.i ], [ 0, %for.cond63.preheader.i ] + %cmp65.i = icmp ult i32 %n62.0.i, 4 + br i1 %cmp65.i, label %for.body67.i, label %for.cond90.i + +for.body67.i: ; preds = %for.cond63.i + %conv64.i = zext i32 %n62.0.i to i64 + %arrayidx72.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv64.i, i64 %idxprom46.i + %14 = shl nuw nsw i64 %conv64.i, 5 + %mul85.i = add nuw nsw i64 %14, %11 + %add.ptr.i226.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i225.i, i64 %mul85.i + %call1.i219.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i226.i, i64 noundef 512, i32 noundef 2, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) %call1.i219.i, ptr %arrayidx72.i, align 8, !tbaa !89 + %inc87.i = add nuw nsw i32 %n62.0.i, 1 + br label %for.cond63.i, !llvm.loop !91 + +for.cond90.i: ; preds = %for.cond63.i, %for.cond.cleanup98.i + %m89.0.i = phi i32 [ %inc123.i, %for.cond.cleanup98.i ], [ 0, %for.cond63.i ] + %cmp91.i = icmp ult i32 %m89.0.i, 4 + br i1 %cmp91.i, label %for.cond95.preheader.i, label %for.cond.cleanup92.i + +for.cond95.preheader.i: ; preds = %for.cond90.i + %idxprom102.i = zext i32 %m89.0.i to i64 + %arrayidx105.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom102.i, i64 %idxprom46.i + %15 = load target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0), ptr %arrayidx105.i, align 8, !tbaa !86, !noalias !92 + br label %for.cond95.i + +for.cond.cleanup92.i: ; preds = %for.cond90.i + %inc126.i = add nuw nsw i32 %k1.0.i, 1 + br label %for.cond32.i, !llvm.loop !95 + +for.cond95.i: ; preds = %for.body99.i, %for.cond95.preheader.i + %n94.0.i = phi i32 [ %inc120.i, %for.body99.i ], [ 0, %for.cond95.preheader.i ] + %cmp97.i = icmp ult i32 %n94.0.i, 4 + br i1 %cmp97.i, label %for.body99.i, label %for.cond.cleanup98.i + +for.cond.cleanup98.i: ; preds = %for.cond95.i + %inc123.i = add nuw nsw i32 %m89.0.i, 1 + br label %for.cond90.i, !llvm.loop !96 + +for.body99.i: ; preds = %for.cond95.i + %conv96.i = zext i32 %n94.0.i to i64 + %arrayidx109.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv96.i, i64 %idxprom46.i + %arrayidx113.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom102.i, i64 %conv96.i + %16 = load target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1), ptr %arrayidx109.i, align 8, !tbaa !89, !noalias !92 + %17 = load target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2), ptr %arrayidx113.i, align 8, !tbaa !82, !noalias !92 + %call.i221.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) noundef %15, target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) noundef %16, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef %17, i32 noundef 3) #5, !noalias !92 + store target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) %call.i221.i, ptr %arrayidx113.i, align 8, !tbaa !82 + %inc120.i = add nuw nsw i32 %n94.0.i, 1 + br label %for.cond95.i, !llvm.loop !97 + +for.cond132.i: ; preds = %for.cond.cleanup140.i, %for.cond132.preheader.i + %m131.0.i = phi i32 [ %inc166.i, %for.cond.cleanup140.i ], [ 0, %for.cond132.preheader.i ] + %cmp133.i = icmp ult i32 %m131.0.i, 4 + br i1 %cmp133.i, label %for.cond137.preheader.i, label %_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit + +for.cond137.preheader.i: ; preds = %for.cond132.i + %idxprom143.i = zext i32 %m131.0.i to i64 + %mul152.i = shl nuw nsw i32 %m131.0.i, 3 + %conv153.i = zext i32 %mul152.i to i64 + %add154.i = add nuw nsw i64 %add52.i, %conv153.i + %mul155.i = shl nuw nsw i64 %add154.i, 8 + %add.ptr.i227.i = getelementptr inbounds float, ptr addrspace(1) %5, i64 %mul155.i + br label %for.cond137.i + +for.cond137.i: ; preds = %for.body141.i, %for.cond137.preheader.i + %n136.0.i = phi i32 [ %inc163.i, %for.body141.i ], [ 0, %for.cond137.preheader.i ] + %cmp139.i = icmp ult i32 %n136.0.i, 4 + br i1 %cmp139.i, label %for.body141.i, label %for.cond.cleanup140.i + +for.cond.cleanup140.i: ; preds = %for.cond137.i + %inc166.i = add nuw nsw i32 %m131.0.i, 1 + br label %for.cond132.i, !llvm.loop !98 + +for.body141.i: ; preds = %for.cond137.i + %conv138.i = zext i32 %n136.0.i to i64 + %arrayidx146.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom143.i, i64 %conv138.i + %mul160.i = shl nuw nsw i64 %conv138.i, 4 + %add161.i = add nuw nsw i64 %add158.i, %mul160.i + %add.ptr.i228.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i227.i, i64 %add161.i + %18 = load target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2), ptr %arrayidx146.i, align 8, !tbaa !82 + tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i228.i, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef %18, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + %inc163.i = add nuw nsw i32 %n136.0.i, 1 + br label %for.cond137.i, !llvm.loop !99 + +_ZZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_ENKUlNS1_7nd_itemILi2EEEE_clESF_.exit: ; preds = %for.cond132.i + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tC.i) #4 + call void @__itt_offload_wi_finish_wrapper() + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #2 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) noundef, target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) noundef, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef, i32 noundef) local_unnamed_addr #3 + +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +declare void @__itt_offload_wi_start_wrapper() + +declare void @__itt_offload_wi_finish_wrapper() + +attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } +attributes #3 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind } +attributes #5 = { convergent nounwind } + +!llvm.module.flags = !{!0, !1} +!opencl.spir.version = !{!2} +!spirv.Source = !{!3} +!sycl_aspects = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52, !53, !54, !55, !56, !57} +!llvm.ident = !{!58} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{i32 1, i32 2} +!3 = !{i32 4, i32 100000} +!4 = !{!"cpu", i32 1} +!5 = !{!"gpu", i32 2} +!6 = !{!"accelerator", i32 3} +!7 = !{!"custom", i32 4} +!8 = !{!"fp16", i32 5} +!9 = !{!"fp64", i32 6} +!10 = !{!"image", i32 9} +!11 = !{!"online_compiler", i32 10} +!12 = !{!"online_linker", i32 11} +!13 = !{!"queue_profiling", i32 12} +!14 = !{!"usm_device_allocations", i32 13} +!15 = !{!"usm_host_allocations", i32 14} +!16 = !{!"usm_shared_allocations", i32 15} +!17 = !{!"usm_system_allocations", i32 17} +!18 = !{!"ext_intel_pci_address", i32 18} +!19 = !{!"ext_intel_gpu_eu_count", i32 19} +!20 = !{!"ext_intel_gpu_eu_simd_width", i32 20} +!21 = !{!"ext_intel_gpu_slices", i32 21} +!22 = !{!"ext_intel_gpu_subslices_per_slice", i32 22} +!23 = !{!"ext_intel_gpu_eu_count_per_subslice", i32 23} +!24 = !{!"ext_intel_max_mem_bandwidth", i32 24} +!25 = !{!"ext_intel_mem_channel", i32 25} +!26 = !{!"usm_atomic_host_allocations", i32 26} +!27 = !{!"usm_atomic_shared_allocations", i32 27} +!28 = !{!"atomic64", i32 28} +!29 = !{!"ext_intel_device_info_uuid", i32 29} +!30 = !{!"ext_oneapi_srgb", i32 30} +!31 = !{!"ext_oneapi_native_assert", i32 31} +!32 = !{!"host_debuggable", i32 32} +!33 = !{!"ext_intel_gpu_hw_threads_per_eu", i32 33} +!34 = !{!"ext_oneapi_cuda_async_barrier", i32 34} +!35 = !{!"ext_oneapi_bfloat16_math_functions", i32 35} +!36 = !{!"ext_intel_free_memory", i32 36} +!37 = !{!"ext_intel_device_id", i32 37} +!38 = !{!"ext_intel_memory_clock_rate", i32 38} +!39 = !{!"ext_intel_memory_bus_width", i32 39} +!40 = !{!"emulated", i32 40} +!41 = !{!"ext_intel_legacy_image", i32 41} +!42 = !{!"ext_oneapi_bindless_images", i32 42} +!43 = !{!"ext_oneapi_bindless_images_shared_usm", i32 43} +!44 = !{!"ext_oneapi_bindless_images_1d_usm", i32 44} +!45 = !{!"ext_oneapi_bindless_images_2d_usm", i32 45} +!46 = !{!"ext_oneapi_interop_memory_import", i32 46} +!47 = !{!"ext_oneapi_interop_memory_export", i32 47} +!48 = !{!"ext_oneapi_interop_semaphore_import", i32 48} +!49 = !{!"ext_oneapi_interop_semaphore_export", i32 49} +!50 = !{!"ext_oneapi_mipmap", i32 50} +!51 = !{!"ext_oneapi_mipmap_anisotropy", i32 51} +!52 = !{!"ext_oneapi_mipmap_level_reference", i32 52} +!53 = !{!"int64_base_atomics", i32 7} +!54 = !{!"int64_extended_atomics", i32 8} +!55 = !{!"usm_system_allocator", i32 17} +!56 = !{!"usm_restricted_shared_allocations", i32 16} +!57 = !{!"host", i32 0} +!58 = !{!"clang version 18.0.0 (https://github.com/intel/llvm.git cc440821c30daabef517c7c8ff75546719f8094c)"} +!59 = !{i32 242145} +!60 = !{i32 -1, i32 -1, i32 -1} +!61 = !{i32 16} +!62 = !{} +!63 = !{i1 false, i1 false, i1 false} +!64 = !{!65, !65, i64 0} +!65 = !{!"any pointer", !66, i64 0} +!66 = !{!"omnipotent char", !67, i64 0} +!67 = !{!"Simple C++ TBAA"} +!68 = !{!69, !71, !73} +!69 = distinct !{!69, !70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!70 = distinct !{!70, !"_ZN7__spirv22InitSizesSTWorkgroupIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!71 = distinct !{!71, !72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!72 = distinct !{!72, !"_ZN7__spirv15initWorkgroupIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!73 = distinct !{!73, !74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_: %agg.result"} +!74 = distinct !{!74, !"_ZN4sycl3_V16detail7Builder10getElementILi2EEEKNS0_7nd_itemIXT_EEEPS5_"} +!75 = !{!76, !78, !73} +!76 = distinct !{!76, !77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv: %agg.result"} +!77 = distinct !{!77, !"_ZN7__spirv28InitSizesSTLocalInvocationIdILi2EN4sycl3_V12idILi2EEEE8initSizeEv"} +!78 = distinct !{!78, !79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v: %agg.result"} +!79 = distinct !{!79, !"_ZN7__spirv21initLocalInvocationIdILi2EN4sycl3_V12idILi2EEEEET0_v"} +!80 = distinct !{!80, !81} +!81 = !{!"llvm.loop.mustprogress"} +!82 = !{!83, !65, i64 0} +!83 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupEfLNS4_3useE2ELm8ELm16ELNS4_6layoutE3EEE", !65, i64 0} +!84 = distinct !{!84, !81} +!85 = distinct !{!85, !81} +!86 = !{!87, !65, i64 0} +!87 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE0ELm8ELm16ELNS4_6layoutE0EEE", !65, i64 0} +!88 = distinct !{!88, !81} +!89 = !{!90, !65, i64 0} +!90 = !{!"_ZTSN4sycl3_V13ext6oneapi12experimental6matrix12joint_matrixINS0_9sub_groupENS2_8bfloat16ELNS4_3useE1ELm16ELm16ELNS4_6layoutE2EEE", !65, i64 0} +!91 = distinct !{!91, !81} +!92 = !{!93} +!93 = distinct !{!93, !94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_: %agg.result"} +!94 = distinct !{!94, !"_ZN4sycl3_V13ext6oneapi12experimental6matrix16joint_matrix_madINS0_9sub_groupENS2_8bfloat16ES7_fLm8ELm16ELm16ELNS4_6layoutE0ELS8_2EEENS4_12joint_matrixIT_T2_LNS4_3useE2EXT3_EXT5_ELS8_3EEESA_RNS9_ISA_T0_LSC_0EXT3_EXT4_EXT6_EEERNS9_ISA_T1_LSC_1EXT4_EXT5_EXT7_EEERSD_"} +!95 = distinct !{!95, !81} +!96 = distinct !{!96, !81} +!97 = distinct !{!97, !81} +!98 = distinct !{!98, !81} +!99 = distinct !{!99, !81} From 5c098816a91b93e3eabcd7b9b8bae38a8ce4b77b Mon Sep 17 00:00:00 2001 From: Viktoria Maximova Date: Fri, 25 Aug 2023 16:09:21 +0200 Subject: [PATCH 09/12] Test conversion instructions for TypeCooperativeMatrixKHR (#2137) (cherry picked from commit 5d4ebd153649feac1b68281f9198196daffd353b) --- .../conversion_instructions.ll | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll new file mode 100644 index 0000000000..bfc710a741 --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll @@ -0,0 +1,149 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv +; TODO: Validation is disabled till the moment the tools in CI are updated (passes locally) +; R/UN: spirv-val %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV + +; RUN: llvm-spirv -r --spirv-target-env=SPV-IR %t.spv -o %t.rev.bc +; RUN: llvm-dis %t.rev.bc +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV: TypeInt [[#TypeInt32:]] 32 0 +; CHECK-SPIRV: TypeInt [[#TypeInt16:]] 16 0 +; CHECK-SPIRV: TypeInt [[#TypeInt8:]] 8 0 +; CHECK-SPIRV: TypeFloat [[#TypeFloat:]] 32 +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeFloat:]] [[#TypeFloat]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeInt32:]] [[#TypeInt32]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeInt16:]] [[#TypeInt16]] +; CHECK-SPIRV: TypeFloat [[#TypeFloat16:]] 16 +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeFloat16:]] [[#TypeFloat16]] +; CHECK-SPIRV: TypeCooperativeMatrixKHR [[#MatrixTypeInt8:]] [[#TypeInt8]] + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertFToU [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#Matrix]]) + +define void @convert_f_to_u() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertFToS [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#Matrix]]) + +define void @convert_f_to_s() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt16]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertSToF [[#MatrixTypeFloat16]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#Matrix]]) + +define void @convert_s_to_f() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt16]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: ConvertUToF [[#MatrixTypeFloat16]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#Matrix]]) + +define void @convert_u_to_f() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt32]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: UConvert [[#MatrixTypeInt8]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %[[#Matrix]]) + +define void @u_convert() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt8]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: SConvert [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructc(i8 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) %[[#Matrix]]) + +define void @s_convert() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt8(i8 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) %0) + ret void +} + +; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat16]] [[#MatrixIn:]] [[#]] {{$}} +; CHECK-SPIRV: FConvert [[#MatrixTypeFloat]] [[#]] [[#MatrixIn]] + +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructDh(half 0xH0000) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) %[[#Matrix]]) + +define void @f_convert() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructHalf(half 0xH0000) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) %0) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructHalf(half noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt8(i8 noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) noundef) + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) noundef) + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"PIE Level", i32 2} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 08d094a0e457360ad8b94b017d2dc277e697ca76)"} From 4d98fa90437a6b9b243b28c7537b68cf5a894ba8 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Thu, 23 Nov 2023 22:59:23 +0100 Subject: [PATCH 10/12] [Backport to 16] Add error checking for cooperative matrix use and scope parameters (#2223) Use should be: MatrixA, MatrixB or Accumulator. Scope must be at max Invocation (others are not supported by the translator). Signed-off-by: Sidorov, Dmitry (cherry picked from commit f18e64da496cc836628b5a4a2dd7646084333267) --- lib/SPIRV/libSPIRV/SPIRVType.cpp | 18 +++ lib/SPIRV/libSPIRV/SPIRVType.h | 3 + .../bf16_conversion_instructions.ll | 32 ++--- .../cooperative_matrix_apply.ll | 18 +-- .../cooperative_matrix_prefetch.ll | 54 ++++---- .../tf32_conversion_instructions.ll | 12 +- .../arithmetic_instructions.ll | 122 +++++++++--------- .../array_of_matrices.ll | 48 +++---- .../conversion_instructions.ll | 80 ++++++------ .../cooperative_matrix.ll | 55 ++++---- .../cooperative_matrix_wrong_scope.ll | 17 +++ .../cooperative_matrix_wrong_use.ll | 17 +++ 12 files changed, 266 insertions(+), 210 deletions(-) create mode 100644 test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_scope.ll create mode 100644 test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_use.ll diff --git a/lib/SPIRV/libSPIRV/SPIRVType.cpp b/lib/SPIRV/libSPIRV/SPIRVType.cpp index f7ba79d3ec..51779146c9 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.cpp +++ b/lib/SPIRV/libSPIRV/SPIRVType.cpp @@ -337,4 +337,22 @@ void SPIRVTypeCooperativeMatrixKHR::decode(std::istream &I) { Decoder >> Id >> CompType >> Args; } +void SPIRVTypeCooperativeMatrixKHR::validate() const { + SPIRVEntry::validate(); + SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + SPIRVConstant *UseConst = static_cast(this->getUse()); + auto InstName = OpCodeNameMap::map(OC); + uint64_t UseValue = UseConst->getZExtIntValue(); + SPVErrLog.checkError( + (UseValue <= CooperativeMatrixUseMatrixAccumulatorKHR), + SPIRVEC_InvalidInstruction, + InstName + "\nIncorrect Use parameter, should be MatrixA, MatrixB or " + "Accumulator\n"); + SPIRVConstant *ScopeConst = static_cast(this->getScope()); + uint64_t ScopeValue = ScopeConst->getZExtIntValue(); + SPVErrLog.checkError((ScopeValue <= ScopeInvocation), + SPIRVEC_InvalidInstruction, + InstName + "\nUnsupported Scope parameter\n"); +} + } // namespace SPIRV diff --git a/lib/SPIRV/libSPIRV/SPIRVType.h b/lib/SPIRV/libSPIRV/SPIRVType.h index 11b59016d8..1e4789d799 100644 --- a/lib/SPIRV/libSPIRV/SPIRVType.h +++ b/lib/SPIRV/libSPIRV/SPIRVType.h @@ -1126,6 +1126,9 @@ class SPIRVTypeCooperativeMatrixKHR : public SPIRVType { SPIRVType *CompType; std::vector Args; +protected: + void validate() const override; + public: const static Op OC = OpTypeCooperativeMatrixKHR; const static SPIRVWord FixedWC = 7; diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll index eb1d1afe51..237c05688b 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/bf16_conversion_instructions.ll @@ -31,16 +31,16 @@ ; CHECK-SPIRV: CompositeConstruct [[#ShortMatTy]] [[#ShortMat:]] ; CHECK-SPIRV: ConvertBF16ToFINTEL [[#FP32MatTy]] [[#]] [[#ShortMat]] -; CHECK-OCL-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z32intel_convert_bfloat16_as_ushortPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#FP32Matrix]]) -; CHECK-OCL-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) -; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z31intel_convert_as_bfloat16_floatPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#ShortMatrix]]) +; CHECK-OCL-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z32intel_convert_bfloat16_as_ushortPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#FP32Matrix]]) +; CHECK-OCL-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-OCL-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z31intel_convert_as_bfloat16_floatPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#ShortMatrix]]) -; CHECK-SPV-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z27__spirv_ConvertFToBF16INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#FP32Matrix]]) -; CHECK-SPV-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) -; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z27__spirv_ConvertBF16ToFINTELPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#ShortMatrix]]) +; CHECK-SPV-IR: %[[#FP32Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z27__spirv_ConvertFToBF16INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#FP32Matrix]]) +; CHECK-SPV-IR: %[[#ShortMatrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-SPV-IR: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z27__spirv_ConvertBF16ToFINTELPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#ShortMatrix]]) target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -48,25 +48,25 @@ target triple = "spir64-unknown-unknown" define void @convert_f_to_bf() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) ret void } define void @convert_bf_to_f() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %0) ret void } -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z27__spirv_ConvertFToBF16INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z27__spirv_ConvertBF16ToFINTEL(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) noundef) !llvm.module.flags = !{!0, !1, !2, !3, !4} !llvm.ident = !{!5} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll index f85a5f0cc8..b0f97b74d7 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_apply.ll @@ -18,9 +18,9 @@ ; CHECK-SPIRV: CooperativeMatrixApplyFunctionINTEL [[#MatTy]] [[#Apply:]] [[#Ptr]] [[#Mat]] ; CHECK-SPIRV: CooperativeMatrixStoreKHR [[#]] [[#Apply]] -; CHECK-LLVM: %[[Mat:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @"_Z26__spirv_CompositeConstructP38class.sycl::_V1::ext::oneapi::bfloat16" -; CHECK-LLVM: %[[Apply:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @"_Z43__spirv_CooperativeMatrixApplyFunctionINTELPU3AS477class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapperPU3AS144__spirv_CooperativeMatrixKHR__short_8_16_0_0"(ptr addrspace(4) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) %[[Mat]]) -; CHECK-LLVM: call spir_func void @"_Z33__spirv_CooperativeMatrixStoreKHRPU3AS138class.sycl::_V1::ext::oneapi::bfloat16PU3AS144__spirv_CooperativeMatrixKHR__short_8_16_0_0liii"(ptr addrspace(1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) %[[Apply]], i64 32, i32 0, i32 3, i32 0) +; CHECK-LLVM: %[[Mat:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @"_Z26__spirv_CompositeConstructP38class.sycl::_V1::ext::oneapi::bfloat16" +; CHECK-LLVM: %[[Apply:[%0-9a-z.]+]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @"_Z43__spirv_CooperativeMatrixApplyFunctionINTELPU3AS477class.sycl::_V1::ext::oneapi::experimental::matrix::helper::reference_wrapperPU3AS144__spirv_CooperativeMatrixKHR__short_3_8_16_0"(ptr addrspace(4) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) %[[Mat]]) +; CHECK-LLVM: call spir_func void @"_Z33__spirv_CooperativeMatrixStoreKHRPU3AS138class.sycl::_V1::ext::oneapi::bfloat16PU3AS144__spirv_CooperativeMatrixKHR__short_3_8_16_0il"(ptr addrspace(1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) %[[Apply]], i32 0, i64 0) ; ModuleID = 'matrix_apply.bc' source_filename = "../llvm/sycl/test-e2e/Matrix/joint_matrix_apply_bf16.cpp" @@ -93,14 +93,14 @@ entry: %call.i.i = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) %ref.tmp6.ascast.i) call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %agg.tmp.i17) store i16 %call.i.i, ptr %agg.tmp.i17, align 2 - %call.i18 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z26__spirv_CompositeConstruct(ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2 %agg.tmp.i17) + %call.i18 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z26__spirv_CompositeConstruct(ptr noundef nonnull byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2 %agg.tmp.i17) call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %agg.tmp.i17) call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %ref.tmp6.i) %lambda.i = getelementptr inbounds %class.anon.0, ptr addrspace(4) %__SYCLKernel.ascast, i64 0, i32 1 %ref.tmp.ascast.i21 = addrspacecast ptr %ref.tmp.i20 to ptr addrspace(4) call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp.i20) store ptr addrspace(4) %lambda.i, ptr %ref.tmp.i20, align 8 - %call.i22 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef %call.i18) + %call.i22 = call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8) %ref.tmp.ascast.i21, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef %call.i18) call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp.i20) %6 = load ptr addrspace(1), ptr %0, align 8 %7 = load i64, ptr %__SYCLKernel, align 8 @@ -114,7 +114,7 @@ entry: %add.ptr.i43 = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i.i, i64 %mul12.i %div14.i = and i64 %sub5.i, -16 %add.ptr.i44 = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i43, i64 %div14.i - call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef %add.ptr.i44, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef %call.i22, i64 noundef 32, i32 noundef 0, i32 noundef 3, i32 noundef 0) + call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef %add.ptr.i44, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef %call.i22, i32 noundef 0, i64 noundef 0) call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %__SYCLKernel) ret void } @@ -126,16 +126,16 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z26__spirv_CompositeConstruct(ptr noundef byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2) local_unnamed_addr +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z26__spirv_CompositeConstruct(ptr noundef byval(%"class.sycl::_V1::ext::oneapi::bfloat16") align 2) local_unnamed_addr ; Function Attrs: convergent nounwind declare dso_local spir_func zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4)) local_unnamed_addr ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8), target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef) local_unnamed_addr +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z43__spirv_CooperativeMatrixApplyFunctionINTEL(ptr addrspace(4) noundef align 8 dereferenceable(8), target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef) local_unnamed_addr ; Function Attrs: convergent nounwind -declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", i16, 8, 16, 0, 0) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef, i32 noundef, i64 noundef) local_unnamed_addr !llvm.module.flags = !{!0, !1} !opencl.spir.version = !{!2} diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll index a6f24bc596..ff6321bbe9 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll @@ -20,9 +20,9 @@ ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const3]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const2]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const0]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const1]] ; CHECK-SPIRV: CompositeConstruct [[#MatTy1]] ; CHECK-SPIRV: CooperativeMatrixLoadKHR [[#MatTy2]] [[#Load1:]] ; CHECK-SPIRV: CooperativeMatrixLengthKHR [[#Int32Ty]] [[#]] [[#MatTy2]] @@ -32,13 +32,13 @@ ; CHECK-SPIRV: CooperativeMatrixStoreKHR -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) ; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i32 0, i32 1, i32 1, i32 0, i64 %_arg_K) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3PU3AS4clii(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i64 %_arg_K, i32 0, i32 1) -; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_3PU3AS4cl -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3PU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_3PU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3i(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) -; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3ili(ptr addrspace(4) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0PU3AS4clii(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i64 %_arg_K, i32 0, i32 1) +; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS4cl +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0PU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2i(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) +; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2ili(ptr addrspace(4) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) ; ModuleID = 'test-matrix-opaque.bc' source_filename = "matrix-int8-test.cpp" @@ -57,8 +57,8 @@ $_ZTSZZ15matrix_multiply = comdat any ; Function Attrs: convergent norecurse define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiply(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { entry: - %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 - %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 + %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 + %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 %agg.tmp16.sroa.0.sroa.0.0.copyload = load i64, ptr %_arg_accB6, align 8 @@ -80,8 +80,8 @@ entry: %cmp.i58.i = icmp ult i64 %5, 2147483648 %sub5.i = sub nsw i64 %2, %5 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i) - %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i.i, ptr %sub_c.sroa.0.i, align 8 + %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 %mul.i = mul nsw i64 %sub.i, 12 %div2452.i = lshr i64 %sub5.i, 4 %mul26.i = mul i64 %div2452.i, 48 @@ -106,19 +106,19 @@ for.body.i: ; preds = %for.cond.i %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 1, i32 noundef 0, i64 noundef %_arg_K) #4 - %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 - %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %call1.i.i) + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 + %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %call1.i.i) %div20.i = mul nsw i32 %k.0.i, 12 %conv21.i = zext i32 %div20.i to i64 %mul23.i = mul i64 %mul22.i, %conv21.i %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 1, i32 noundef 0, i64 noundef %mul22.i) #4 - %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 + %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %ref.tmp29.sroa.0.i, align 8 store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %sub_c.sroa.0.i, align 8 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i) @@ -131,31 +131,31 @@ _ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6 %mul39.i = mul nuw i64 %div2452.i, 12 %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i) ret void } ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 -declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef) +declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef) ; Function Attrs: convergent declare dso_local spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll index 6392c94138..8d0dcbfde4 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/tf32_conversion_instructions.ll @@ -24,8 +24,8 @@ ; CHECK-SPIRV: CompositeConstruct [[#FP32MatTy]] [[#FP32Mat:]] ; CHECK-SPIRV: RoundFToTF32INTEL [[#FP32MatTy]] [[#]] [[#FP32Mat]] -; CHECK-LLVM: %[[#Mat:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z25__spirv_RoundFToTF32INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#Mat]]) +; CHECK-LLVM: %[[#Mat:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z25__spirv_RoundFToTF32INTELPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#Mat]]) target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -33,14 +33,14 @@ target triple = "spir64-unknown-unknown" define void @convert_f_to_tf() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) ret void } -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z25__spirv_RoundFToTF32INTEL(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) !llvm.module.flags = !{!0, !1, !2, !3, !4} !llvm.ident = !{!5} diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll index 87e4fc17dd..40b8749e11 100644 --- a/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/arithmetic_instructions.ll @@ -20,79 +20,79 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: SNegate [[#MatrixTypeInt]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z15__spirv_SNegatePU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1) +; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z15__spirv_SNegatePU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1) define spir_kernel void @testSNegate(i32 %a) #0 !kernel_arg_addr_space !10 !kernel_arg_access_qual !11 !kernel_arg_type !12 !kernel_arg_type_qual !9 !kernel_arg_base_type !12 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: FNegate [[#MatrixTypeFloat]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z15__spirv_FNegatePU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) +; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z15__spirv_FNegatePU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) define spir_kernel void @testFNeg(float %a) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !9 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: IAdd [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: %2 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IAddPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: %2 = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IAddPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define spir_kernel void @testIAdd(i32 %a, i32 %b) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: ISub [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_ISubPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_ISubPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define spir_kernel void @testISub(i32 %a, i32 %b) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: IMul [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IMulPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IMulPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define spir_kernel void @testIMul(i32 %a, i32 %b) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: SDiv [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_SDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_SDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define void @testSDiv(i32 %a, i32 %b) { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: UDiv [[#MatrixTypeInt]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_UDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_UDivPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) define void @testUDiv(i32 %a, i32 %b) { - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %2) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %2 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %1, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %2) ret void } @@ -100,69 +100,69 @@ define void @testUDiv(i32 %a, i32 %b) { ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FAdd [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FAddPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %0 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: %1 = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FAddPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFAdd(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FSub [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FSubPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FSubPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFSub(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FMul [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FMulPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FMulPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFMul(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixA:]] [[#]] {{$}} ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixB:]] [[#]] {{$}} ; CHECK-SPIRV: FDiv [[#MatrixTypeFloat]] [[#]] [[#MatrixA]] [[#MatrixB]] -; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FDivPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) +; CHECK-LLVM: %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FDivPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2S1_(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) define spir_kernel void @testFDiv(float %a, float %b) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %1) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %1 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %1) ret void } -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z15__spirv_FNegate(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z15__spirv_SNegate(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IAdd(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_ISub(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_IMul(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_SDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z12__spirv_UDiv(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FAdd(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FSub(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FMul(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z12__spirv_FDiv(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) attributes #0 = { nounwind } diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll index 64265ab19c..28979b4eb1 100644 --- a/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/array_of_matrices.ll @@ -27,9 +27,9 @@ ; CHECK-SPIRV: TypeArray [[#ArrayTy3:]] [[#StructTy3]] [[#]] ; CHECK-SPIRV: TypeArray [[#]] [[#ArrayTy3]] [[#]] -; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) } -; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) } -; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) } +; CHECK-LLVM: %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) } ; CHECK-LLVM: alloca [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]] ; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]] ; CHECK-LLVM: alloca [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]] @@ -42,9 +42,9 @@ target triple = "spir64-unknown-unknown" %"class.sycl::_V1::__generated_multi_ptr" = type { ptr addrspace(1) } %"class.sycl::_V1::__generated_multi_ptr.0" = type { ptr addrspace(1) } %"class.sycl::_V1::__generated_multi_ptr.1" = type { ptr addrspace(1) } -%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) } -%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) } -%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix" = type { target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) } +%"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6" = type { target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) } %"class.sycl::_V1::ext::oneapi::bfloat16" = type { i16 } $_ZTSZZ12joint_matmulILj256ELj256ELj256ELj256ELj2EN4sycl3_V13ext6oneapi8bfloat16EfLj16EEdPT4_S6_PT5_RNS1_5queueEiENKUlRNS1_7handlerEE_clESC_EUlNS1_7nd_itemILi2EEEE_ = comdat any @@ -117,8 +117,8 @@ for.cond.cleanup7.i: ; preds = %for.cond5.i for.body8.i: ; preds = %for.cond5.i %conv.i = zext i32 %n.0.i to i64 %arrayidx10.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom.i, i64 %conv.i - %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef 0.000000e+00) #5 - store target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) %call.i.i, ptr %arrayidx10.i, align 8, !tbaa !82 + %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef 0.000000e+00) #5 + store target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) %call.i.i, ptr %arrayidx10.i, align 8, !tbaa !82 %inc.i = add nuw nsw i32 %n.0.i, 1 br label %for.cond5.i, !llvm.loop !84 @@ -196,8 +196,8 @@ for.body42.i: ; preds = %for.cond39.i %add55.i = add nuw nsw i64 %add52.i, %conv54.i %mul56.i = shl nuw nsw i64 %add55.i, 8 %gep = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %invariant.gep, i64 %mul56.i - %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 - store target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) %call1.i.i, ptr %arrayidx47.i, align 8, !tbaa !86 + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %gep, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) %call1.i.i, ptr %arrayidx47.i, align 8, !tbaa !86 %inc60.i = add nuw nsw i32 %m38.0.i, 1 br label %for.cond39.i, !llvm.loop !88 @@ -212,8 +212,8 @@ for.body67.i: ; preds = %for.cond63.i %14 = shl nuw nsw i64 %conv64.i, 5 %mul85.i = add nuw nsw i64 %14, %11 %add.ptr.i226.i = getelementptr inbounds %"class.sycl::_V1::ext::oneapi::bfloat16", ptr addrspace(1) %add.ptr.i225.i, i64 %mul85.i - %call1.i219.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i226.i, i64 noundef 512, i32 noundef 2, i32 noundef 3, i32 noundef 0) #5 - store target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) %call1.i219.i, ptr %arrayidx72.i, align 8, !tbaa !89 + %call1.i219.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef %add.ptr.i226.i, i64 noundef 512, i32 noundef 2, i32 noundef 3, i32 noundef 0) #5 + store target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) %call1.i219.i, ptr %arrayidx72.i, align 8, !tbaa !89 %inc87.i = add nuw nsw i32 %n62.0.i, 1 br label %for.cond63.i, !llvm.loop !91 @@ -225,7 +225,7 @@ for.cond90.i: ; preds = %for.cond63.i, %for. for.cond95.preheader.i: ; preds = %for.cond90.i %idxprom102.i = zext i32 %m89.0.i to i64 %arrayidx105.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.5"]], ptr %tA.i, i64 0, i64 %idxprom102.i, i64 %idxprom46.i - %15 = load target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0), ptr %arrayidx105.i, align 8, !tbaa !86, !noalias !92 + %15 = load target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0), ptr %arrayidx105.i, align 8, !tbaa !86, !noalias !92 br label %for.cond95.i for.cond.cleanup92.i: ; preds = %for.cond90.i @@ -245,10 +245,10 @@ for.body99.i: ; preds = %for.cond95.i %conv96.i = zext i32 %n94.0.i to i64 %arrayidx109.i = getelementptr inbounds [4 x [2 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix.6"]], ptr %tB.i, i64 0, i64 %conv96.i, i64 %idxprom46.i %arrayidx113.i = getelementptr inbounds [4 x [4 x %"struct.sycl::_V1::ext::oneapi::experimental::matrix::joint_matrix"]], ptr %tC.i, i64 0, i64 %idxprom102.i, i64 %conv96.i - %16 = load target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1), ptr %arrayidx109.i, align 8, !tbaa !89, !noalias !92 - %17 = load target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2), ptr %arrayidx113.i, align 8, !tbaa !82, !noalias !92 - %call.i221.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) noundef %15, target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) noundef %16, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef %17, i32 noundef 3) #5, !noalias !92 - store target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) %call.i221.i, ptr %arrayidx113.i, align 8, !tbaa !82 + %16 = load target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1), ptr %arrayidx109.i, align 8, !tbaa !89, !noalias !92 + %17 = load target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2), ptr %arrayidx113.i, align 8, !tbaa !82, !noalias !92 + %call.i221.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef %15, target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) noundef %16, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef %17, i32 noundef 3) #5, !noalias !92 + store target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) %call.i221.i, ptr %arrayidx113.i, align 8, !tbaa !82 %inc120.i = add nuw nsw i32 %n94.0.i, 1 br label %for.cond95.i, !llvm.loop !97 @@ -281,8 +281,8 @@ for.body141.i: ; preds = %for.cond137.i %mul160.i = shl nuw nsw i64 %conv138.i, 4 %add161.i = add nuw nsw i64 %add158.i, %mul160.i %add.ptr.i228.i = getelementptr inbounds float, ptr addrspace(1) %add.ptr.i227.i, i64 %add161.i - %18 = load target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2), ptr %arrayidx146.i, align 8, !tbaa !82 - tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i228.i, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef %18, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 + %18 = load target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2), ptr %arrayidx146.i, align 8, !tbaa !82 + tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef %add.ptr.i228.i, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef %18, i64 noundef 256, i32 noundef 0, i32 noundef 3, i32 noundef 0) #5 %inc163.i = add nuw nsw i32 %n136.0.i, 1 br label %for.cond137.i, !llvm.loop !99 @@ -302,19 +302,19 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 declare void @llvm.assume(i1 noundef) #2 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef) local_unnamed_addr #3 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z26__spirv_CompositeConstructIffLm8ELm16ELN5__spv9MatrixUseE2ELNS0_12MatrixLayoutE3ELNS0_5Scope4FlagE3EEPNS0_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEET_(float noundef) local_unnamed_addr #3 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm8ELm16ELN5__spv9MatrixUseE0ELNS6_12MatrixLayoutE0ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) @_Z32__spirv_CooperativeMatrixLoadKHRIU3AS1N4sycl3_V13ext6oneapi8bfloat16ES4_Lm16ELm16ELN5__spv9MatrixUseE1ELNS6_12MatrixLayoutE2ELNS6_5Scope4FlagE3EEPNS6_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEPT_mS8_SA_i(ptr addrspace(1) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 ; Function Attrs: convergent nounwind -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 8, 16, 3, 0) noundef, target("spirv.CooperativeMatrixKHR", i16, 16, 16, 3, 1) noundef, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef, i32 noundef) local_unnamed_addr #3 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) @_Z31__spirv_CooperativeMatrixMadKHRIN4sycl3_V13ext6oneapi8bfloat16EfLm8ELm16ELm16ELN5__spv9MatrixUseE0ELS6_1ELS6_2ELNS5_12MatrixLayoutE0ELS7_2ELS7_3ELNS5_5Scope4FlagE3EEPNS5_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT3_EXT9_EXT10_EXT6_EEEPNSA_IT_XT1_EXT2_EXT7_EXT10_EXT4_EEEPNSA_ISE_XT2_EXT3_EXT8_EXT10_EXT5_EEESD_S9_(target("spirv.CooperativeMatrixKHR", i16, 3, 8, 16, 0) noundef, target("spirv.CooperativeMatrixKHR", i16, 3, 16, 16, 1) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef, i32 noundef) local_unnamed_addr #3 ; Function Attrs: convergent nounwind -declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", float, 8, 16, 3, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRIU3AS1ffLm8ELm16ELN5__spv9MatrixUseE2ELNS1_12MatrixLayoutE3ELNS1_5Scope4FlagE3EEvPT_PNS1_24__spirv_CooperativeMatrixKHRIT0_XT1_EXT2_EXT4_EXT5_EXT3_EEEmS3_S5_i(ptr addrspace(1) noundef, target("spirv.CooperativeMatrixKHR", float, 3, 8, 16, 2) noundef, i64 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr #3 declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll index bfc710a741..39bc1763e9 100644 --- a/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/conversion_instructions.ll @@ -26,117 +26,117 @@ target triple = "spir64-unknown-unknown" ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: ConvertFToU [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#Matrix]]) +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#Matrix]]) define void @convert_f_to_u() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: ConvertFToS [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %[[#Matrix]]) +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructf(float 0.000000e+00) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %[[#Matrix]]) define void @convert_f_to_s() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt16]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: ConvertSToF [[#MatrixTypeFloat16]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#Matrix]]) +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#Matrix]]) define void @convert_s_to_f() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt16]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: ConvertUToF [[#MatrixTypeFloat16]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructs(i16 0) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %[[#Matrix]]) +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructs(i16 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %[[#Matrix]]) define void @convert_u_to_f() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt32]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: UConvert [[#MatrixTypeInt8]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %[[#Matrix]]) +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %[[#Matrix]]) define void @u_convert() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeInt8]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: SConvert [[#MatrixTypeInt32]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructc(i8 0) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) %[[#Matrix]]) +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructc(i8 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) %[[#Matrix]]) define void @s_convert() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt8(i8 0) - %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt8(i8 0) + %call = call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) %0) ret void } ; CHECK-SPIRV: CompositeConstruct [[#MatrixTypeFloat16]] [[#MatrixIn:]] [[#]] {{$}} ; CHECK-SPIRV: FConvert [[#MatrixTypeFloat]] [[#]] [[#MatrixIn]] -; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructDh(half 0xH0000) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) %[[#Matrix]]) +; CHECK-LLVM: %[[#Matrix:]] = call spir_func target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructDh(half 0xH0000) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) %[[#Matrix]]) define void @f_convert() { entry: - %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructHalf(half 0xH0000) - %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) %0) + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructHalf(half 0xH0000) + %call = call spir_func target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) %0) ret void } -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructHalf(half noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructHalf(half noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt32(i32 noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt32(i32 noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt16(i16 noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt16(i16 noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructInt8(i8 noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructInt8(i8 noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToU_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z77__spirv_ConvertFToS_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2(target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertSToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_3(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) @_Z77__spirv_ConvertUToF_RPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2_rtpPU3AS145__spirv_CooperativeMatrixKHR__short_3_12_12_2(target("spirv.CooperativeMatrixKHR", i16, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) @_Z74__spirv_UConvert_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2(target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_3(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z74__spirv_SConvert_RPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_12_2(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 12, 2) noundef) -declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_3_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_3(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 3) noundef) +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 2) @_Z75__spirv_FConvert_RPU3AS145__spirv_CooperativeMatrixKHR__float_3_12_12_2_satPU3AS144__spirv_CooperativeMatrixKHR__half_3_12_12_2(target("spirv.CooperativeMatrixKHR", half, 3, 12, 12, 2) noundef) !llvm.module.flags = !{!0, !1, !2, !3, !4} !llvm.ident = !{!5} diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll index 9c7e9d9f91..18897f6203 100644 --- a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix.ll @@ -18,9 +18,9 @@ ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const3:]] 3 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const3]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const3]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const2]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const0]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const1]] ; CHECK-SPIRV: CompositeConstruct [[#MatTy1]] ; CHECK-SPIRV: CooperativeMatrixLoadKHR [[#MatTy2]] ; CHECK-SPIRV: CooperativeMatrixLengthKHR [[#Int32Ty]] [[#]] [[#MatTy2]] @@ -29,12 +29,13 @@ ; CHECK-SPIRV: CooperativeMatrixStoreKHR -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3PU3AS4clii -; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_3(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_3PU3AS4cl -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR{{.*}}(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) -; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR{{.*}}(ptr addrspace(4) %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0PU3AS4clii +; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS4cl +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR{{.*}}(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) +; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR{{.*}}(ptr addrspace(4) %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) + ; ModuleID = 'test-matrix-opaque.bc' source_filename = "matrix-int8-test.cpp" @@ -53,8 +54,8 @@ $_ZTSZZ15matrix_multiply = comdat any ; Function Attrs: convergent norecurse define weak_odr dso_local spir_kernel void @_ZTSZZ15matrix_multiply(ptr addrspace(1) noundef align 1 %_arg_accA, ptr addrspace(1) noundef align 1 %_arg_accB, ptr noundef byval(%"class.sycl::_V1::range") align 8 %_arg_accB5, ptr noundef byval(%"class.sycl::_V1::id") align 8 %_arg_accB6, ptr addrspace(1) noundef align 4 %_arg_accC, i64 noundef %_arg_N, i64 noundef %_arg_K) local_unnamed_addr #0 comdat { entry: - %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 - %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), align 8 + %sub_c.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 + %ref.tmp29.sroa.0.i = alloca target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), align 8 %agg.tmp15.sroa.0.sroa.2.0..sroa_idx = getelementptr inbounds %"class.sycl::_V1::range", ptr %_arg_accB5, i64 0, i32 0, i32 0, i64 1 %agg.tmp15.sroa.0.sroa.2.0.copyload = load i64, ptr %agg.tmp15.sroa.0.sroa.2.0..sroa_idx, align 8 %0 = getelementptr inbounds %"class.sycl::_V1::id", ptr %_arg_accB6, i64 0, i32 0, i32 0, i64 0 @@ -78,8 +79,8 @@ entry: %sub5.i = sub nsw i64 %3, %6 %sub_c.sroa.0.i.0.i.0..sroa_cast = bitcast ptr %sub_c.sroa.0.i to ptr call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) - %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i.i, ptr %sub_c.sroa.0.i, align 8 + %call.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef 0) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i.i, ptr %sub_c.sroa.0.i, align 8 %mul.i = mul nsw i64 %sub.i, 12 %div2452.i = lshr i64 %sub5.i, 4 %mul26.i = mul i64 %div2452.i, 48 @@ -106,18 +107,18 @@ for.body.i: ; preds = %for.cond.i %conv13.i = zext i32 %mul12.i to i64 %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) - %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 - %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) %call1.i.i) + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 + %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %call1.i.i) %div20.i = mul nsw i32 %k.0.i, 12 %conv21.i = zext i32 %div20.i to i64 %mul23.i = mul i64 %mul22.i, %conv21.i %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) - %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 + %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 - store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 + store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %7, align 8 store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %8, align 8 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i.0.i.0..sroa_cast) @@ -130,28 +131,28 @@ _ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6 %mul39.i = mul nuw i64 %div2452.i, 12 %add.ptr.i81.i = getelementptr inbounds i32, ptr addrspace(1) %add.ptr.i.i, i64 %mul39.i %call.ascast.i.i = addrspacecast ptr addrspace(1) %add.ptr.i81.i to ptr addrspace(4) - %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3), ptr %sub_c.sroa.0.i, align 8 - tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 + %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 + tail call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef %call.ascast.i.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0..i, i32 noundef 0, i64 noundef %_arg_N, i32 noundef 1) #4 call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %sub_c.sroa.0.i.0.i.0..sroa_cast) ret void } ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 -declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef) +declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef) ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 3) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 3) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 3) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_scope.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_scope.ll new file mode 100644 index 0000000000..ef25389724 --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_scope.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv 2>&1 | FileCheck %s + +; CHECK: InvalidInstruction: Can't translate llvm instruction: +; CHECK: TypeCooperativeMatrixKHR +; CHECK: Unsupported Scope parameter + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_u() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 8, 12, 12, 2) @_Z26__spirv_CompositeConstructFloat(float noundef) diff --git a/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_use.ll b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_use.ll new file mode 100644 index 0000000000..0016f888cb --- /dev/null +++ b/test/extensions/KHR/SPV_KHR_cooperative_matrix/cooperative_matrix_wrong_use.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s -o %t.bc +; RUN: not llvm-spirv %t.bc --spirv-ext=+SPV_KHR_cooperative_matrix -o %t.spv 2>&1 | FileCheck %s + +; CHECK: InvalidInstruction: Can't translate llvm instruction: +; CHECK: TypeCooperativeMatrixKHR +; CHECK: Incorrect Use parameter, should be MatrixA, MatrixB or Accumulator + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +define void @convert_f_to_u() { +entry: + %0 = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float 0.000000e+00) + ret void +} + +declare spir_func noundef target("spirv.CooperativeMatrixKHR", float, 3, 12, 12, 3) @_Z26__spirv_CompositeConstructFloat(float noundef) From 9d8262da82a33d5ed1adb90946c599e9c90fdc7f Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy <89994100+VyacheslavLevytskyy@users.noreply.github.com> Date: Mon, 27 Nov 2023 06:21:58 -0800 Subject: [PATCH 11/12] Fix cooperative matrix prefetch test (scope parameter) (#2234) - change Scope argument to one of two available options: ScopeWorkgroup/ScopeWorkgroup - fix arguments order in calls to OpCooperativeMatrixLoadKHR() (cherry picked from commit 4e1d3e0192de56b222567adbaee73983e8a4b108) --- .../cooperative_matrix_prefetch.ll | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll index ff6321bbe9..620f217d3d 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll @@ -21,7 +21,7 @@ ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const2:]] 2 ; CHECK-SPIRV-DAG: Constant [[#Int32Ty]] [[#Const1:]] 1 ; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy1:]] [[#Int32Ty]] [[#Const3]] [[#Const12]] [[#Const12]] [[#Const2]] -; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const0]] [[#Const12]] [[#Const48]] [[#Const0]] +; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy2:]] [[#Int8Ty]] [[#Const3]] [[#Const12]] [[#Const48]] [[#Const0]] ; CHECK-SPIRV-DAG: TypeCooperativeMatrixKHR [[#MatTy3:]] [[#Int8Ty]] [[#Const2]] [[#Const48]] [[#Const12]] [[#Const1]] ; CHECK-SPIRV: CompositeConstruct [[#MatTy1]] ; CHECK-SPIRV: CooperativeMatrixLoadKHR [[#MatTy2]] [[#Load1:]] @@ -31,13 +31,13 @@ ; CHECK-SPIRV: CooperativeMatrixMulAddKHR [[#MatTy1]] ; CHECK-SPIRV: CooperativeMatrixStoreKHR - ; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i32 0, i32 1, i32 1, i32 0, i64 %_arg_K) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0PU3AS4clii(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i64 %_arg_K, i32 0, i32 1) -; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS4cl -; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHRPU3AS144__spirv_CooperativeMatrixKHR__char_0_12_48_0PU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2i(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) +; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i32 0, i32 12, i32 48, i32 0, i32 0, i64 %_arg_K) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0PU3AS4cili(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i64 %_arg_K, i32 1) +; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) +; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i32 0, i32 12, i32 48, i32 0, i32 0, i64 %mul22.i) +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS4cil +; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHRPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0PU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2i(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) ; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2ili(ptr addrspace(4) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) ; ModuleID = 'test-matrix-opaque.bc' @@ -105,19 +105,19 @@ for.body.i: ; preds = %for.cond.i %conv13.i = zext i32 %mul12.i to i64 %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) - tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 1, i32 noundef 0, i64 noundef %_arg_K) #4 - %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i64 noundef %_arg_K, i32 noundef 0, i32 noundef 1) #4 - %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) %call1.i.i) + tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i32 noundef 0, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %_arg_K) + %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i64 noundef %_arg_K, i32 noundef 1) #4 + %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) %call1.i.i) %div20.i = mul nsw i32 %k.0.i, 12 %conv21.i = zext i32 %div20.i to i64 %mul23.i = mul i64 %mul22.i, %conv21.i %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) - tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i32 noundef 0, i32 noundef 1, i32 noundef 1, i32 noundef 0, i64 noundef %mul22.i) #4 - %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i64 noundef %mul22.i) #4 + tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i32 noundef 0, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %mul22.i) + %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i64 noundef %mul22.i) #4 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i) %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 - %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 + %call.i77.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) noundef %call1.i.i, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef %call1.i73.i, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i, i32 noundef 12) #4 store target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) %call.i77.i, ptr %ref.tmp29.sroa.0.i, align 8 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i = load i64, ptr %ref.tmp29.sroa.0.i, align 8 store i64 %ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.i.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0.ref.tmp29.sroa.0.0..i, ptr %sub_c.sroa.0.i, align 8 @@ -140,19 +140,19 @@ _ZZZ15matrix_multiplyIiaLm24ELm96ELm24ELm96ELm24ELm24EEvR10big_matrixIT_XT5_EXT6 ; Function Attrs: convergent declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstruct(i32 noundef) local_unnamed_addr #2 -declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef) +declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) noundef) -; Function Attrs: convergent -declare dso_local spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 +; Function Attrs: convergent nounwind +declare dso_local spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i64 noundef, i32 noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i64 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent -declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 0, 12, 48, 0) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef) local_unnamed_addr #2 +declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) noundef, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef) local_unnamed_addr #2 ; Function Attrs: convergent declare dso_local spir_func void @_Z33__spirv_CooperativeMatrixStoreKHR(ptr addrspace(4) noundef, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2 From 8a3917e6fc9a80c846af0084e4c7d8ef75259b32 Mon Sep 17 00:00:00 2001 From: Dmitry Sidorov Date: Thu, 16 May 2024 18:02:14 +0200 Subject: [PATCH 12/12] Revert "Revert "Remove CoordX and CoordY arguments of OpCooperativeMatrixPrefetchINTEL" (#2293)" (#2560) This reverts commit 93f2783317007343da7e8b00be0a363f2171d77f. (cherry picked from commit 9b00b8891d685379df64296093ac638e6ab10b85) --- lib/SPIRV/libSPIRV/SPIRVInstruction.h | 2 +- .../cooperative_matrix_prefetch.ll | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 5c3d0daadf..82c7f33f19 100644 --- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -3554,7 +3554,7 @@ class SPIRVCooperativeMatrixPrefetchINTELInstBase typedef SPIRVInstTemplate \ SPIRV##x##INTEL; -_SPIRV_OP(CooperativeMatrixPrefetch, false, 8, true, 5) +_SPIRV_OP(CooperativeMatrixPrefetch, false, 6, true, 3) #undef _SPIRV_OP class SPIRVCooperativeMatrixInvocationInstructionsINTELInstBase diff --git a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll index 620f217d3d..03dfbdfeb8 100644 --- a/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll +++ b/test/extensions/INTEL/SPV_INTEL_joint_matrix/cooperative_matrix_prefetch.ll @@ -32,10 +32,10 @@ ; CHECK-SPIRV: CooperativeMatrixStoreKHR ; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z26__spirv_CompositeConstructi(i32 0) -; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i32 0, i32 12, i32 48, i32 0, i32 0, i64 %_arg_K) +; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 12, i32 48, i32 0, i32 0, i64 %_arg_K) ; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0PU3AS4cili(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i64 %_arg_K, i32 1) ; CHECK-LLVM: call spir_func i32 @_Z34__spirv_CooperativeMatrixLengthKHRPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) -; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 0, i32 0, i32 12, i32 48, i32 0, i32 0, i64 %mul22.i) +; CHECK-LLVM: call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTELPU3AS4ciiiil(ptr addrspace(4) %[[MatrixPtr:[%0-9a-z.]+]], i32 12, i32 48, i32 0, i32 0, i64 %mul22.i) ; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z86__spirv_CooperativeMatrixLoadKHR_RPU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS4cil ; CHECK-LLVM: call spir_func target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @_Z34__spirv_CooperativeMatrixMulAddKHRPU3AS144__spirv_CooperativeMatrixKHR__char_3_12_48_0PU3AS144__spirv_CooperativeMatrixKHR__char_2_48_12_1PU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2i(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) %{{.*}}, target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) ; CHECK-LLVM: call spir_func void @_Z33__spirv_CooperativeMatrixStoreKHRPU3AS4iPU3AS144__spirv_CooperativeMatrixKHR__uint_3_12_12_2ili(ptr addrspace(4) %{{.*}}, target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2) @@ -105,7 +105,7 @@ for.body.i: ; preds = %for.cond.i %conv13.i = zext i32 %mul12.i to i64 %add.ptr.i96.i = getelementptr inbounds i8, ptr addrspace(1) %add.ptr.i93.i, i64 %conv13.i %call.ascast.i66.i = addrspacecast ptr addrspace(1) %add.ptr.i96.i to ptr addrspace(4) - tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i32 noundef 0, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %_arg_K) + tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %_arg_K) %call1.i.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef %call.ascast.i66.i, i32 noundef 0, i64 noundef %_arg_K, i32 noundef 1) #4 %len = tail call spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) %call1.i.i) %div20.i = mul nsw i32 %k.0.i, 12 @@ -113,7 +113,7 @@ for.body.i: ; preds = %for.cond.i %mul23.i = mul i64 %mul22.i, %conv21.i %add.ptr.i111.i = getelementptr i8, ptr addrspace(1) %add.ptr.i108140.i, i64 %mul23.i %call.ascast.i72.i = addrspacecast ptr addrspace(1) %add.ptr.i111.i to ptr addrspace(4) - tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i32 noundef 0, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %mul22.i) + tail call spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 12, i32 noundef 48, i32 noundef 0, i32 noundef 0, i64 noundef %mul22.i) %call1.i73.i = tail call spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 2, 48, 12, 1) @_Z32__spirv_CooperativeMatrixLoadKHR_2(ptr addrspace(4) noundef %call.ascast.i72.i, i32 noundef 0, i64 noundef %mul22.i) #4 call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ref.tmp29.sroa.0.i) %sub_c.sroa.0.i.0.sub_c.sroa.0.i.0.sub_c.sroa.0.0.sub_c.sroa.0.0.sub_c.sroa.0.0.125.i = load target("spirv.CooperativeMatrixKHR", i32, 3, 12, 12, 2), ptr %sub_c.sroa.0.i, align 8 @@ -143,7 +143,7 @@ declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i32, 3, declare dso_local spir_func noundef i32 @_Z34__spirv_CooperativeMatrixLengthKHR(target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) noundef) ; Function Attrs: convergent nounwind -declare dso_local spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 +declare dso_local spir_func void @_Z38__spirv_CooperativeMatrixPrefetchINTEL(ptr addrspace(4) noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i64 noundef) local_unnamed_addr #2 ; Function Attrs: convergent declare dso_local spir_func noundef target("spirv.CooperativeMatrixKHR", i8, 3, 12, 48, 0) @_Z32__spirv_CooperativeMatrixLoadKHR_1(ptr addrspace(4) noundef, i32 noundef, i64 noundef, i32 noundef) local_unnamed_addr #2