From e139505326031585a5ccbee04765d2276d48354e Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 3 Nov 2022 10:09:17 -0400 Subject: [PATCH 01/21] WIP --- .../device-requirements.ll | 4 +- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 53 +++++++++++++++++++ llvm/tools/sycl-post-link/ModuleSplitter.h | 3 ++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 18 +++++-- 4 files changed, 73 insertions(+), 5 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll b/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll index aff8c8939b4e5..891033091a5cc 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll @@ -40,7 +40,7 @@ $_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_ = comdat any $_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_ = comdat any ; Function Attrs: convergent mustprogress noinline norecurse optnone -define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_() #0 comdat !kernel_arg_buffer_location !43 { +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_() #0 comdat !kernel_arg_buffer_location !43 !sycl_used_aspects !44 { entry: %__SYCLKernel = alloca %class.anon, align 1 %__SYCLKernel.ascast = addrspacecast %class.anon* %__SYCLKernel to %class.anon addrspace(4)* @@ -66,7 +66,7 @@ entry: } ; Function Attrs: convergent mustprogress noinline norecurse optnone -define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_() #0 comdat !kernel_arg_buffer_location !43 { +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_() #0 comdat !kernel_arg_buffer_location !43 !sycl_used_aspects !45 { entry: %__SYCLKernel = alloca %class.anon, align 1 %__SYCLKernel.ascast = addrspacecast %class.anon* %__SYCLKernel to %class.anon addrspace(4)* diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index a4ed14841a196..3b61960c4ae8d 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -761,5 +761,58 @@ getDoubleGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { return std::make_unique(std::move(MD), std::move(Groups)); } +std::unique_ptr +getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { + EntryPointGroupVec Groups; + std::map AspectsToFunctions; + + Module &M = MD.getModule(); + + // Only process module entry points: + for (auto &F : M.functions()) { + if (!isEntryPoint(F, EmitOnlyKernelsAsEntryPoints) || + !MD.isEntryPointCandidate(F)) { + continue; + } + + if (!F.hasMetadata("sycl_used_aspects")) { + AspectsToFunctions["no-aspects"].insert(&F); + llvm::outs() << "Function " << F.getName() << " has no aspects\n"; + } else { + auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, + unsigned OpNo) -> APInt { + Constant *C = + cast(N->getOperand(OpNo).get())->getValue(); + return C->getUniqueInteger(); + }; + std::string Key = "aspects"; + const MDNode *MDN = F.getMetadata("sycl_used_aspects"); + for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { + auto Int = ExtractIntegerFromMDNodeOperand(MDN, I); + SmallVector Str; + Int.toStringUnsigned(Str); + Key += "-" + std::string(Str.data(), Str.size()); + } + llvm::outs() << "Function " << F.getName() << " has aspects. key: " << Key << "\n"; + AspectsToFunctions[Key].insert(&F); + } + } + + if (!AspectsToFunctions.empty()) { + Groups.reserve(AspectsToFunctions.size()); + for (auto &EPG : AspectsToFunctions) { + Groups.emplace_back(EntryPointGroup{ + EPG.first, std::move(EPG.second), MD.getEntryPointGroup().Props}); + } + } else { + // No entry points met, record this. + Groups.push_back({GLOBAL_SCOPE_NAME, {}}); + } + if (Groups.size() > 1) + return std::make_unique(std::move(MD), std::move(Groups)); + else + return std::make_unique(std::move(MD), std::move(Groups)); +} + } // namespace module_split } // namespace llvm diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.h b/llvm/tools/sycl-post-link/ModuleSplitter.h index f362c2c1973da..4d6f8c2fc96b5 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.h +++ b/llvm/tools/sycl-post-link/ModuleSplitter.h @@ -253,6 +253,9 @@ getSplitterByMode(ModuleDesc &&MD, IRSplitMode Mode, std::unique_ptr getDoubleGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); +std::unique_ptr +getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); + #ifndef NDEBUG void dumpEntryPoints(const EntryPointSet &C, const char *msg = "", int Tab = 0); void dumpEntryPoints(const Module &M, bool OnlyKernelsAreEntryPoints = false, diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 46cbdcceaae56..064dbacc43994 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -848,7 +848,18 @@ processInputModule(std::unique_ptr M) { DUMP_ENTRY_POINTS(MMs.back().entries(), MMs.back().Name.c_str(), 3); Modified = true; } - bool SplitOccurred = SplitByScope || SplitByDoubleGRF || SplitByESIMD; + + SmallVector Modules; + for (auto &MD : MMs) { + auto Splitter = module_split::getPerAspectsSplitter(std::move(MD), EmitOnlyKernelsAsEntryPoints); + while (Splitter->hasMoreSplits()) { + // FIXME: spec constants processing must occur at the lowest level + module_split::ModuleDesc ModuleDesc = Splitter->nextSplit(); + Modules.emplace_back(std::move(ModuleDesc)); + } + } + + bool SplitOccurred = SplitByScope || SplitByDoubleGRF || SplitByESIMD || Modules.size() != MMs.size(); if (IROutputOnly) { if (SplitOccurred) { @@ -868,12 +879,13 @@ processInputModule(std::unique_ptr M) { errs() << "sycl-post-link NOTE: no modifications to the input LLVM IR " "have been made\n"; } - for (module_split::ModuleDesc &IrMD : MMs) { + for (module_split::ModuleDesc &IrMD : Modules) { IrPropSymFilenameTriple T = saveModule(IrMD, ID, OutIRFileName); addTableRow(*Table, T); +++ID; } } - ++ID; + //++ID; } return Table; } From 981c726b6ab69267535f119ddc48e7f24ce940e3 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 3 Nov 2022 11:38:07 -0400 Subject: [PATCH 02/21] Refactor sycl-post-link part --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 2 - llvm/tools/sycl-post-link/sycl-post-link.cpp | 49 +++++++++++++------- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 3b61960c4ae8d..f4b0e7ffbb060 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -777,7 +777,6 @@ getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { if (!F.hasMetadata("sycl_used_aspects")) { AspectsToFunctions["no-aspects"].insert(&F); - llvm::outs() << "Function " << F.getName() << " has no aspects\n"; } else { auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, unsigned OpNo) -> APInt { @@ -793,7 +792,6 @@ getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { Int.toStringUnsigned(Str); Key += "-" + std::string(Str.data(), Str.size()); } - llvm::outs() << "Function " << F.getName() << " has aspects. key: " << Key << "\n"; AspectsToFunctions[Key].insert(&F); } } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 064dbacc43994..3448f6a9db86a 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -756,9 +756,33 @@ processInputModule(std::unique_ptr M) { module_split::getSplitterByMode(module_split::ModuleDesc{std::move(M)}, SplitMode, IROutputOnly, EmitOnlyKernelsAsEntryPoints); + + SmallVector TopLevelModules; + bool SplitByProperties = false; + + while (ScopedSplitter->hasMoreSplits()) { + module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); + std::unique_ptr PropertiesSplitter = + module_split::getPerAspectsSplitter(std::move(MD), + EmitOnlyKernelsAsEntryPoints); + + // Here we perform second-level splitting based on device-specific + // features used/declared in entry points. + // This step is mandatory, because it is required for functional + // correctness, i.e. to prevent speculative compilation of kernels that use + // optional features on a HW which doesn't support them. + while (PropertiesSplitter->hasMoreSplits()) { + TopLevelModules.emplace_back(PropertiesSplitter->nextSplit()); + } + + SplitByProperties |= PropertiesSplitter->totalSplits() > 1; + } + const bool SplitByScope = ScopedSplitter->totalSplits() > 1; Modified |= SplitByScope; + Modified |= SplitByProperties; + // FIXME: this check should be performed on all split levels if (DeviceGlobals) ScopedSplitter->verifyNoCrossModuleDeviceGlobalUsage(); @@ -769,11 +793,12 @@ processInputModule(std::unique_ptr M) { // "leaf" ModuleDesc's resulted from splitting. Some bookkeeping is needed for // ESIMD splitter to link back needed modules. - // Proceed with top-level splitting. - while (ScopedSplitter->hasMoreSplits()) { - module_split::ModuleDesc MDesc = ScopedSplitter->nextSplit(); + // Based on results from a top-level splitting, we perform some lower-level + // splitting for various unique features. + for (module_split::ModuleDesc &MDesc : TopLevelModules) { DUMP_ENTRY_POINTS(MDesc.entries(), MDesc.Name.c_str(), 1); + // FIXME: double grf should be handled by properties splitter above std::unique_ptr DoubleGRFSplitter = module_split::getDoubleGRFSplitter(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); @@ -849,17 +874,8 @@ processInputModule(std::unique_ptr M) { Modified = true; } - SmallVector Modules; - for (auto &MD : MMs) { - auto Splitter = module_split::getPerAspectsSplitter(std::move(MD), EmitOnlyKernelsAsEntryPoints); - while (Splitter->hasMoreSplits()) { - // FIXME: spec constants processing must occur at the lowest level - module_split::ModuleDesc ModuleDesc = Splitter->nextSplit(); - Modules.emplace_back(std::move(ModuleDesc)); - } - } - - bool SplitOccurred = SplitByScope || SplitByDoubleGRF || SplitByESIMD || Modules.size() != MMs.size(); + bool SplitOccurred = + SplitByScope || SplitByDoubleGRF || SplitByESIMD || SplitByProperties; if (IROutputOnly) { if (SplitOccurred) { @@ -879,13 +895,12 @@ processInputModule(std::unique_ptr M) { errs() << "sycl-post-link NOTE: no modifications to the input LLVM IR " "have been made\n"; } - for (module_split::ModuleDesc &IrMD : Modules) { + for (module_split::ModuleDesc &IrMD : MMs) { IrPropSymFilenameTriple T = saveModule(IrMD, ID, OutIRFileName); addTableRow(*Table, T); -++ID; } } - //++ID; + ++ID; } return Table; } From 03a4db567246ca971a4c48009f959fc030464b84 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 4 Nov 2022 07:34:32 -0400 Subject: [PATCH 03/21] Refactor/generalize some code --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 109 ++++++++++++++----- 1 file changed, 84 insertions(+), 25 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index f4b0e7ffbb060..f46630cf293bb 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -761,10 +761,85 @@ getDoubleGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { return std::make_unique(std::move(MD), std::move(Groups)); } +namespace { + struct KernelProperties { + KernelProperties() = default; + + KernelProperties(const Function *F) { + if (const MDNode *MDN = F->getMetadata("sycl_used_aspects")) { + auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, + unsigned OpNo) -> auto { + Constant *C = + cast(N->getOperand(OpNo).get())->getValue(); + return C->getUniqueInteger().getSExtValue(); + }; + + for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { + Aspects.insert(ExtractIntegerFromMDNodeOperand(MDN, I)); + } + + } + } + + std::string getName() const { + if (Aspects.empty()) + return "no-aspects"; + + std::string Ret = "aspects"; + for (int A : Aspects) { + Ret += "-" + std::to_string(A); + } + return Ret; + } + + SmallSet Aspects; + // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, + // double-grf and other properties + bool operator==(const KernelProperties &Other) const { + if (Aspects.size() != Other.Aspects.size()) + return false; + + if (!llvm::all_of(Other.Aspects, + [&](int Aspect) { return Aspects.contains(Aspect); })) + return false; + + return true; + } + + unsigned hash() const { + llvm::hash_code AspectsHash = + llvm::hash_combine_range(Aspects.begin(), Aspects.end()); + return static_cast(llvm::hash_combine(AspectsHash)); + } + }; + + struct KernelPropertiesAsKeyInfo { + static inline KernelProperties getEmptyKey() { + return KernelProperties{}; + } + + static inline KernelProperties getTombstoneKey() { + // FIXME: is it correct? + return KernelProperties{}; + } + + static unsigned getHashValue(const KernelProperties &Value) { + return Value.hash(); + } + + static bool isEqual(const KernelProperties &LHS, + const KernelProperties &RHS) { + return LHS == RHS; + } + }; +} + std::unique_ptr getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { EntryPointGroupVec Groups; - std::map AspectsToFunctions; + + DenseMap + PropertiesToFunctionsMap; Module &M = MD.getModule(); @@ -775,32 +850,16 @@ getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { continue; } - if (!F.hasMetadata("sycl_used_aspects")) { - AspectsToFunctions["no-aspects"].insert(&F); - } else { - auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, - unsigned OpNo) -> APInt { - Constant *C = - cast(N->getOperand(OpNo).get())->getValue(); - return C->getUniqueInteger(); - }; - std::string Key = "aspects"; - const MDNode *MDN = F.getMetadata("sycl_used_aspects"); - for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { - auto Int = ExtractIntegerFromMDNodeOperand(MDN, I); - SmallVector Str; - Int.toStringUnsigned(Str); - Key += "-" + std::string(Str.data(), Str.size()); - } - AspectsToFunctions[Key].insert(&F); - } + auto Key = KernelProperties(&F); + PropertiesToFunctionsMap[Key].insert(&F); } - if (!AspectsToFunctions.empty()) { - Groups.reserve(AspectsToFunctions.size()); - for (auto &EPG : AspectsToFunctions) { - Groups.emplace_back(EntryPointGroup{ - EPG.first, std::move(EPG.second), MD.getEntryPointGroup().Props}); + if (!PropertiesToFunctionsMap.empty()) { + Groups.reserve(PropertiesToFunctionsMap.size()); + for (auto &EPG : PropertiesToFunctionsMap) { + Groups.emplace_back(EntryPointGroup{EPG.first.getName(), + std::move(EPG.second), + MD.getEntryPointGroup().Props}); } } else { // No entry points met, record this. From f7de0044f439b33bb1d2350f0366e6d963d91f5f Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 4 Nov 2022 08:42:19 -0400 Subject: [PATCH 04/21] Renaming; better names for split modules --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 15 ++++++++------- llvm/tools/sycl-post-link/ModuleSplitter.h | 2 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index f46630cf293bb..8ab35eabaeb51 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -781,11 +781,11 @@ namespace { } } - std::string getName() const { + std::string getName(StringRef BaseName) const { if (Aspects.empty()) - return "no-aspects"; + return BaseName.str() + "-no-aspects"; - std::string Ret = "aspects"; + std::string Ret = BaseName.str() + "-aspects"; for (int A : Aspects) { Ret += "-" + std::to_string(A); } @@ -835,7 +835,7 @@ namespace { } std::unique_ptr -getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { +getPropertiesBasedSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { EntryPointGroupVec Groups; DenseMap @@ -857,14 +857,15 @@ getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { if (!PropertiesToFunctionsMap.empty()) { Groups.reserve(PropertiesToFunctionsMap.size()); for (auto &EPG : PropertiesToFunctionsMap) { - Groups.emplace_back(EntryPointGroup{EPG.first.getName(), - std::move(EPG.second), - MD.getEntryPointGroup().Props}); + Groups.emplace_back(EntryPointGroup{ + EPG.first.getName(MD.getEntryPointGroup().GroupId), + std::move(EPG.second), MD.getEntryPointGroup().Props}); } } else { // No entry points met, record this. Groups.push_back({GLOBAL_SCOPE_NAME, {}}); } + if (Groups.size() > 1) return std::make_unique(std::move(MD), std::move(Groups)); else diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.h b/llvm/tools/sycl-post-link/ModuleSplitter.h index 4d6f8c2fc96b5..11bf18468e8e4 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.h +++ b/llvm/tools/sycl-post-link/ModuleSplitter.h @@ -254,7 +254,7 @@ std::unique_ptr getDoubleGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); std::unique_ptr -getPerAspectsSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); +getPropertiesBasedSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); #ifndef NDEBUG void dumpEntryPoints(const EntryPointSet &C, const char *msg = "", int Tab = 0); diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3448f6a9db86a..c62ce0a847d9b 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -763,8 +763,8 @@ processInputModule(std::unique_ptr M) { while (ScopedSplitter->hasMoreSplits()) { module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); std::unique_ptr PropertiesSplitter = - module_split::getPerAspectsSplitter(std::move(MD), - EmitOnlyKernelsAsEntryPoints); + module_split::getPropertiesBasedSplitter(std::move(MD), + EmitOnlyKernelsAsEntryPoints); // Here we perform second-level splitting based on device-specific // features used/declared in entry points. From cc0183987c95a3233304f2d6cf01ce359f9bc0c5 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 4 Nov 2022 09:08:37 -0400 Subject: [PATCH 05/21] Move device code split tests into a separate directory --- .../sycl-post-link/{ => device-code-split}/auto-module-split-1.ll | 0 .../sycl-post-link/{ => device-code-split}/auto-module-split-2.ll | 0 .../sycl-post-link/{ => device-code-split}/auto-module-split-3.ll | 0 .../{ => device-code-split}/auto-module-split-func-ptr.ll | 0 .../sycl-post-link/{ => device-code-split}/basic-module-split.ll | 0 .../{ => device-code-split}/one-kernel-per-module.ll | 0 .../{ => device-code-split}/split-with-func-ptrs.ll | 0 .../{ => device-code-split}/split-with-kernel-declarations.ll | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/auto-module-split-1.ll (100%) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/auto-module-split-2.ll (100%) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/auto-module-split-3.ll (100%) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/auto-module-split-func-ptr.ll (100%) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/basic-module-split.ll (100%) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/one-kernel-per-module.ll (100%) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/split-with-func-ptrs.ll (100%) rename llvm/test/tools/sycl-post-link/{ => device-code-split}/split-with-kernel-declarations.ll (100%) diff --git a/llvm/test/tools/sycl-post-link/auto-module-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-1.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/auto-module-split-1.ll rename to llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-1.ll diff --git a/llvm/test/tools/sycl-post-link/auto-module-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-2.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/auto-module-split-2.ll rename to llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-2.ll diff --git a/llvm/test/tools/sycl-post-link/auto-module-split-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-3.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/auto-module-split-3.ll rename to llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-3.ll diff --git a/llvm/test/tools/sycl-post-link/auto-module-split-func-ptr.ll b/llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-func-ptr.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/auto-module-split-func-ptr.ll rename to llvm/test/tools/sycl-post-link/device-code-split/auto-module-split-func-ptr.ll diff --git a/llvm/test/tools/sycl-post-link/basic-module-split.ll b/llvm/test/tools/sycl-post-link/device-code-split/basic-module-split.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/basic-module-split.ll rename to llvm/test/tools/sycl-post-link/device-code-split/basic-module-split.ll diff --git a/llvm/test/tools/sycl-post-link/one-kernel-per-module.ll b/llvm/test/tools/sycl-post-link/device-code-split/one-kernel-per-module.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/one-kernel-per-module.ll rename to llvm/test/tools/sycl-post-link/device-code-split/one-kernel-per-module.ll diff --git a/llvm/test/tools/sycl-post-link/split-with-func-ptrs.ll b/llvm/test/tools/sycl-post-link/device-code-split/split-with-func-ptrs.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/split-with-func-ptrs.ll rename to llvm/test/tools/sycl-post-link/device-code-split/split-with-func-ptrs.ll diff --git a/llvm/test/tools/sycl-post-link/split-with-kernel-declarations.ll b/llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll similarity index 100% rename from llvm/test/tools/sycl-post-link/split-with-kernel-declarations.ll rename to llvm/test/tools/sycl-post-link/device-code-split/split-with-kernel-declarations.ll From f266933a40c89b895d993b5aed03609c9655ffb3 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 4 Nov 2022 09:08:59 -0400 Subject: [PATCH 06/21] Fix DenseMap helpers --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 30 +++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 8ab35eabaeb51..22aac678aa125 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -795,7 +795,30 @@ namespace { SmallSet Aspects; // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, // double-grf and other properties + + static KernelProperties getTombstone() { + KernelProperties Ret; + Ret.IsTombstoneKey = true; + return Ret; + } + + static KernelProperties getEmpty() { + KernelProperties Ret; + Ret.IsEmpty = true; + return Ret; + } + + private: + // For DenseMap: + bool IsTombstoneKey = false; + bool IsEmpty = false; + + public: bool operator==(const KernelProperties &Other) const { + // Tombstone does not compare equal to any other item + if (IsTombstoneKey || Other.IsTombstoneKey) + return false; + if (Aspects.size() != Other.Aspects.size()) return false; @@ -803,7 +826,7 @@ namespace { [&](int Aspect) { return Aspects.contains(Aspect); })) return false; - return true; + return IsEmpty == Other.IsEmpty; } unsigned hash() const { @@ -815,12 +838,11 @@ namespace { struct KernelPropertiesAsKeyInfo { static inline KernelProperties getEmptyKey() { - return KernelProperties{}; + return KernelProperties::getEmpty(); } static inline KernelProperties getTombstoneKey() { - // FIXME: is it correct? - return KernelProperties{}; + return KernelProperties::getTombstone(); } static unsigned getHashValue(const KernelProperties &Value) { From eca5ca4c874b2aefb5112a7a9322e2aa7594dad7 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 4 Nov 2022 09:10:09 -0400 Subject: [PATCH 07/21] Fix cross module device globals detection --- llvm/tools/sycl-post-link/sycl-post-link.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index c62ce0a847d9b..3e85abd766f24 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -760,6 +760,10 @@ processInputModule(std::unique_ptr M) { SmallVector TopLevelModules; bool SplitByProperties = false; + // FIXME: this check should be performed on all split levels + if (DeviceGlobals) + ScopedSplitter->verifyNoCrossModuleDeviceGlobalUsage(); + while (ScopedSplitter->hasMoreSplits()) { module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); std::unique_ptr PropertiesSplitter = @@ -782,10 +786,6 @@ processInputModule(std::unique_ptr M) { Modified |= SplitByScope; Modified |= SplitByProperties; - // FIXME: this check should be performed on all split levels - if (DeviceGlobals) - ScopedSplitter->verifyNoCrossModuleDeviceGlobalUsage(); - // TODO this nested splitting scheme will not scale well when other split // "dimensions" will be added. Some infra/"split manager" needs to be // implemented in this case - e.g. all needed splitters are registered, then From 731f6f7ce9390579ddc114b74f6d6410dd36f1d2 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 4 Nov 2022 09:13:53 -0400 Subject: [PATCH 08/21] revert changes in device_requirments test --- .../sycl-post-link/device-requirements/device-requirements.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll b/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll index 891033091a5cc..aff8c8939b4e5 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/device-requirements.ll @@ -40,7 +40,7 @@ $_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_ = comdat any $_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_ = comdat any ; Function Attrs: convergent mustprogress noinline norecurse optnone -define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_() #0 comdat !kernel_arg_buffer_location !43 !sycl_used_aspects !44 { +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE_() #0 comdat !kernel_arg_buffer_location !43 { entry: %__SYCLKernel = alloca %class.anon, align 1 %__SYCLKernel.ascast = addrspacecast %class.anon* %__SYCLKernel to %class.anon addrspace(4)* @@ -66,7 +66,7 @@ entry: } ; Function Attrs: convergent mustprogress noinline norecurse optnone -define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_() #0 comdat !kernel_arg_buffer_location !43 !sycl_used_aspects !45 { +define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlvE0_() #0 comdat !kernel_arg_buffer_location !43 { entry: %__SYCLKernel = alloca %class.anon, align 1 %__SYCLKernel.ascast = addrspacecast %class.anon* %__SYCLKernel to %class.anon addrspace(4)* From df73f90392c3373175aa189bee2d77b7495b1aa2 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 7 Nov 2022 09:35:34 -0500 Subject: [PATCH 09/21] Add a couple of LIT tests --- .../device-code-split/per-aspect-split-1.ll | 133 ++++++++++++++++++ .../device-code-split/per-aspect-split-2.ll | 51 +++++++ 2 files changed, 184 insertions(+) create mode 100644 llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll create mode 100644 llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll new file mode 100644 index 0000000000000..5c8fcfdca2385 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll @@ -0,0 +1,133 @@ +; This test emulates two translation units with 3 kernels: +; TU0_kernel0 - 1st translation unit, no aspects used +; TU0_kernel1 - 1st translation unit, aspect 1 is used +; TU1_kernel2 - 2nd translation unit, no aspects used + +; The test is intended to check that sycl-post-link correctly separates kernels +; that use aspects from kernels which doesn't use aspects regardless of device +; code split mode + +; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 + +; RUN: sycl-post-link -split=source -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 + +; RUN: sycl-post-link -split=kernel -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-M0-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.ll --check-prefixes CHECK-M1-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefixes CHECK-M2-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefixes CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 + +; Regardless of device code split mode, each kernel should go into a separate +; device image + +; CHECK-M2-IR: define {{.*}} @TU0_kernel0 +; CHECK-M2-SYMS: TU0_kernel0 + +; CHECK-M1-IR: define {{.*}} @TU0_kernel1 +; CHECK-M1-SYMS: TU0_kernel1 + +; CHECK-M0-IR: define {{.*}} @TU1_kernel2 +; CHECK-M0-SYMS: TU1_kernel2 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux" + +; FIXME: device globals should also be properly distributed across device images +; if they are of optional type +@_ZL2GV = internal addrspace(1) constant [1 x i32] [i32 42], align 4 + +define dso_local spir_kernel void @TU0_kernel0() #0 { +entry: + call spir_func void @foo() + ret void +} + +define dso_local spir_func void @foo() { +entry: + %a = alloca i32, align 4 + %call = call spir_func i32 @bar(i32 1) + %add = add nsw i32 2, %call + store i32 %add, i32* %a, align 4 + ret void +} + +; Function Attrs: nounwind +define linkonce_odr dso_local spir_func i32 @bar(i32 %arg) { +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, i32* %arg.addr, align 4 + %0 = load i32, i32* %arg.addr, align 4 + ret i32 %0 +} + +define dso_local spir_kernel void @TU0_kernel1() #0 !sycl_used_aspects !2 { +entry: + call spir_func void @foo1() + ret void +} + +; Function Attrs: nounwind +define dso_local spir_func void @foo1() { +entry: + %a = alloca i32, align 4 + store i32 2, i32* %a, align 4 + ret void +} + +define dso_local spir_kernel void @TU1_kernel2() #1 { +entry: + call spir_func void @foo2() + ret void +} + +; Function Attrs: nounwind +define dso_local spir_func void @foo2() { +entry: + %a = alloca i32, align 4 + %0 = load i32, i32 addrspace(4)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(4)* addrspacecast ([1 x i32] addrspace(1)* @_ZL2GV to [1 x i32] addrspace(4)*), i64 0, i64 0), align 4 + %add = add nsw i32 4, %0 + store i32 %add, i32* %a, align 4 + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } + +!opencl.spir.version = !{!0, !0} +!spirv.Source = !{!1, !1} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{i32 1} diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll new file mode 100644 index 0000000000000..ef5aa84c0fdf0 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll @@ -0,0 +1,51 @@ +; The test is intended to check that sycl-post-link correctly groups kernels +; by unique sets of aspects used in them + +; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M1-SYMS +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS + +; FIXME: We expect to see exactly three modules +; CHECK-TABLE: Code +; CHECK-TABLE-NEXT: _0.sym +; CHECK-TABLE-NEXT: _1.sym +; CHECK-TABLE-NEXT: _2.sym +; CHECK-TABLE-NEXT: _3.sym +; CHECK-TABLE-EMPTY: + +; CHECK-M0-SYMS: kernel2 +; CHECK-M1-SYMS: kernel3 +; CHECK-M2-SYMS: kernel0 +; CHECK-M3-SYMS: kernel1 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux" + +define dso_local spir_kernel void @kernel0() #0 !sycl_used_aspects !1 { +entry: + ret void +} + +define dso_local spir_kernel void @kernel1() #0 !sycl_used_aspects !2 { +entry: + ret void +} + +define dso_local spir_kernel void @kernel2() #0 !sycl_used_aspects !3 { +entry: + ret void +} + +define dso_local spir_kernel void @kernel3() #0 !sycl_used_aspects !4 { +entry: + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } + +!1 = !{i32 1} +!2 = !{i32 1, i32 2} +!3 = !{i32 2, i32 1} +!4 = !{i32 2, i32 3, i32 4} From 18ba12c8fc05aa6facfe4e630aaac75e11f405d9 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 11 Nov 2022 09:40:47 -0500 Subject: [PATCH 10/21] KernelProperties -> UsedOptionalFeatures --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 42 +++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 22aac678aa125..9e00bf723e061 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -762,10 +762,14 @@ getDoubleGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { } namespace { - struct KernelProperties { - KernelProperties() = default; - - KernelProperties(const Function *F) { + // Data structure, which represent a combination of all possible optional + // features used in a function. + // + // It has extra methods to be useable as a key in llvm::DenseMap. + struct UsedOptionalFeatures { + UsedOptionalFeatures() = default; + + UsedOptionalFeatures(const Function *F) { if (const MDNode *MDN = F->getMetadata("sycl_used_aspects")) { auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, unsigned OpNo) -> auto { @@ -796,14 +800,14 @@ namespace { // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, // double-grf and other properties - static KernelProperties getTombstone() { - KernelProperties Ret; + static UsedOptionalFeatures getTombstone() { + UsedOptionalFeatures Ret; Ret.IsTombstoneKey = true; return Ret; } - static KernelProperties getEmpty() { - KernelProperties Ret; + static UsedOptionalFeatures getEmpty() { + UsedOptionalFeatures Ret; Ret.IsEmpty = true; return Ret; } @@ -814,7 +818,7 @@ namespace { bool IsEmpty = false; public: - bool operator==(const KernelProperties &Other) const { + bool operator==(const UsedOptionalFeatures &Other) const { // Tombstone does not compare equal to any other item if (IsTombstoneKey || Other.IsTombstoneKey) return false; @@ -836,21 +840,21 @@ namespace { } }; - struct KernelPropertiesAsKeyInfo { - static inline KernelProperties getEmptyKey() { - return KernelProperties::getEmpty(); + struct UsedOptionalFeaturesAsKeyInfo { + static inline UsedOptionalFeatures getEmptyKey() { + return UsedOptionalFeatures::getEmpty(); } - static inline KernelProperties getTombstoneKey() { - return KernelProperties::getTombstone(); + static inline UsedOptionalFeatures getTombstoneKey() { + return UsedOptionalFeatures::getTombstone(); } - static unsigned getHashValue(const KernelProperties &Value) { + static unsigned getHashValue(const UsedOptionalFeatures &Value) { return Value.hash(); } - static bool isEqual(const KernelProperties &LHS, - const KernelProperties &RHS) { + static bool isEqual(const UsedOptionalFeatures &LHS, + const UsedOptionalFeatures &RHS) { return LHS == RHS; } }; @@ -860,7 +864,7 @@ std::unique_ptr getPropertiesBasedSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { EntryPointGroupVec Groups; - DenseMap + DenseMap PropertiesToFunctionsMap; Module &M = MD.getModule(); @@ -872,7 +876,7 @@ getPropertiesBasedSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { continue; } - auto Key = KernelProperties(&F); + auto Key = UsedOptionalFeatures(&F); PropertiesToFunctionsMap[Key].insert(&F); } From 7d7fd45eead5c53ff1eb4f96a19740f8e145b66d Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 11 Nov 2022 09:43:07 -0500 Subject: [PATCH 11/21] Cache UsedOptionalFeatures hash --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 9e00bf723e061..1e23083c894d1 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -783,6 +783,10 @@ namespace { } } + + llvm::hash_code AspectsHash = + llvm::hash_combine_range(Aspects.begin(), Aspects.end()); + Hash = static_cast(llvm::hash_combine(AspectsHash)); } std::string getName(StringRef BaseName) const { @@ -814,6 +818,7 @@ namespace { private: // For DenseMap: + llvm::hash_code Hash = {}; bool IsTombstoneKey = false; bool IsEmpty = false; @@ -834,9 +839,7 @@ namespace { } unsigned hash() const { - llvm::hash_code AspectsHash = - llvm::hash_combine_range(Aspects.begin(), Aspects.end()); - return static_cast(llvm::hash_combine(AspectsHash)); + return static_cast(Hash); } }; From bd2ae95d9adb99ced823b1b9084e547940850c15 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 11 Nov 2022 10:24:26 -0500 Subject: [PATCH 12/21] Aspects order shouldn't matter --- .../device-code-split/per-aspect-split-2.ll | 14 +++++++++----- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 19 ++++++++++--------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll index ef5aa84c0fdf0..ff9977d2d72d9 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll @@ -12,13 +12,17 @@ ; CHECK-TABLE-NEXT: _0.sym ; CHECK-TABLE-NEXT: _1.sym ; CHECK-TABLE-NEXT: _2.sym -; CHECK-TABLE-NEXT: _3.sym ; CHECK-TABLE-EMPTY: -; CHECK-M0-SYMS: kernel2 -; CHECK-M1-SYMS: kernel3 -; CHECK-M2-SYMS: kernel0 -; CHECK-M3-SYMS: kernel1 +; CHECK-M0-SYMS: kernel3 +; CHECK-M0-SYMS-EMPTY: + +; CHECK-M1-SYMS: kernel0 +; CHECK-M1-SYMS-EMPTY: + +; CHECK-M2-SYMS: kernel1 +; CHECK-M2-SYMS: kernel2 +; CHECK-M2-SYMS-EMPTY: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-linux" diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 1e23083c894d1..50151c2559c22 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -767,6 +767,10 @@ namespace { // // It has extra methods to be useable as a key in llvm::DenseMap. struct UsedOptionalFeatures { + SmallVector Aspects; + // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, + // double-grf and other properties + UsedOptionalFeatures() = default; UsedOptionalFeatures(const Function *F) { @@ -779,9 +783,9 @@ namespace { }; for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { - Aspects.insert(ExtractIntegerFromMDNodeOperand(MDN, I)); + Aspects.push_back(ExtractIntegerFromMDNodeOperand(MDN, I)); } - + llvm::sort(Aspects); } llvm::hash_code AspectsHash = @@ -800,10 +804,6 @@ namespace { return Ret; } - SmallSet Aspects; - // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, - // double-grf and other properties - static UsedOptionalFeatures getTombstone() { UsedOptionalFeatures Ret; Ret.IsTombstoneKey = true; @@ -831,9 +831,10 @@ namespace { if (Aspects.size() != Other.Aspects.size()) return false; - if (!llvm::all_of(Other.Aspects, - [&](int Aspect) { return Aspects.contains(Aspect); })) - return false; + for (size_t I = 0, E = Aspects.size(); I != E; ++I) { + if (Aspects[I] != Other.Aspects[I]) + return false; + } return IsEmpty == Other.IsEmpty; } From 496ee74f8ad4b597d1dc562f11e041fdfe90f2ea Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Fri, 11 Nov 2022 11:01:52 -0500 Subject: [PATCH 13/21] More renamings --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 3 ++- llvm/tools/sycl-post-link/ModuleSplitter.h | 3 ++- llvm/tools/sycl-post-link/sycl-post-link.cpp | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index f495cfde81b0c..75ad02fd86b20 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -865,7 +865,8 @@ namespace { } std::unique_ptr -getPropertiesBasedSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { +getSplitterByOptionalFeatures(ModuleDesc &&MD, + bool EmitOnlyKernelsAsEntryPoints) { EntryPointGroupVec Groups; DenseMap diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.h b/llvm/tools/sycl-post-link/ModuleSplitter.h index 6355e127f39cb..31a70d99c9dc5 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.h +++ b/llvm/tools/sycl-post-link/ModuleSplitter.h @@ -254,7 +254,8 @@ std::unique_ptr getLargeGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); std::unique_ptr -getPropertiesBasedSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints); +getSplitterByOptionalFeatures(ModuleDesc &&MD, + bool EmitOnlyKernelsAsEntryPoints); #ifndef NDEBUG void dumpEntryPoints(const EntryPointSet &C, const char *msg = "", int Tab = 0); diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 9b37bb696a59a..cf278b3d59bca 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -758,7 +758,7 @@ processInputModule(std::unique_ptr M) { EmitOnlyKernelsAsEntryPoints); SmallVector TopLevelModules; - bool SplitByProperties = false; + bool SplitByOptionalFeatures = false; // FIXME: this check should be performed on all split levels if (DeviceGlobals) @@ -766,25 +766,25 @@ processInputModule(std::unique_ptr M) { while (ScopedSplitter->hasMoreSplits()) { module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); - std::unique_ptr PropertiesSplitter = - module_split::getPropertiesBasedSplitter(std::move(MD), - EmitOnlyKernelsAsEntryPoints); + std::unique_ptr OptionalFeaturesSplitter = + module_split::getSplitterByOptionalFeatures( + std::move(MD), EmitOnlyKernelsAsEntryPoints); // Here we perform second-level splitting based on device-specific // features used/declared in entry points. // This step is mandatory, because it is required for functional // correctness, i.e. to prevent speculative compilation of kernels that use // optional features on a HW which doesn't support them. - while (PropertiesSplitter->hasMoreSplits()) { - TopLevelModules.emplace_back(PropertiesSplitter->nextSplit()); + while (OptionalFeaturesSplitter->hasMoreSplits()) { + TopLevelModules.emplace_back(OptionalFeaturesSplitter->nextSplit()); } - SplitByProperties |= PropertiesSplitter->totalSplits() > 1; + SplitByOptionalFeatures |= OptionalFeaturesSplitter->totalSplits() > 1; } const bool SplitByScope = ScopedSplitter->totalSplits() > 1; Modified |= SplitByScope; - Modified |= SplitByProperties; + Modified |= SplitByOptionalFeatures; // TODO this nested splitting scheme will not scale well when other split // "dimensions" will be added. Some infra/"split manager" needs to be @@ -874,8 +874,8 @@ processInputModule(std::unique_ptr M) { Modified = true; } - bool SplitOccurred = - SplitByScope || SplitByLargeGRF || SplitByESIMD || SplitByProperties; + bool SplitOccurred = SplitByScope || SplitByLargeGRF || SplitByESIMD || + SplitByOptionalFeatures; if (IROutputOnly) { if (SplitOccurred) { From c3b975db7e4605c1daf3a1bee3fd6ba625aa0bbc Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Tue, 15 Nov 2022 04:06:09 -0500 Subject: [PATCH 14/21] Make test more strict --- .../sycl-post-link/device-code-split/per-aspect-split-1.ll | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll index 5c8fcfdca2385..6fd05a2f1c8ad 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll @@ -54,12 +54,15 @@ ; CHECK-M2-IR: define {{.*}} @TU0_kernel0 ; CHECK-M2-SYMS: TU0_kernel0 +; CHECK-M2-SYMS-EMPTY: ; CHECK-M1-IR: define {{.*}} @TU0_kernel1 ; CHECK-M1-SYMS: TU0_kernel1 +; CHECK-M1-SYMS-EMPTY: ; CHECK-M0-IR: define {{.*}} @TU1_kernel2 ; CHECK-M0-SYMS: TU1_kernel2 +; CHECK-M0-SYMS-EMPTY: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-linux" From 8cac6b870ad62a128173f07e90b1c4609f03946e Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Tue, 15 Nov 2022 10:25:11 -0500 Subject: [PATCH 15/21] Apply clang-format --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 164 +++++++++---------- 1 file changed, 81 insertions(+), 83 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 75ad02fd86b20..f5f9f1e6b51e5 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -762,107 +762,105 @@ getLargeGRFSplitter(ModuleDesc &&MD, bool EmitOnlyKernelsAsEntryPoints) { } namespace { - // Data structure, which represent a combination of all possible optional - // features used in a function. - // - // It has extra methods to be useable as a key in llvm::DenseMap. - struct UsedOptionalFeatures { - SmallVector Aspects; - // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, - // double-grf and other properties - - UsedOptionalFeatures() = default; - - UsedOptionalFeatures(const Function *F) { - if (const MDNode *MDN = F->getMetadata("sycl_used_aspects")) { - auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, - unsigned OpNo) -> auto { - Constant *C = - cast(N->getOperand(OpNo).get())->getValue(); - return C->getUniqueInteger().getSExtValue(); - }; - - for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { - Aspects.push_back(ExtractIntegerFromMDNodeOperand(MDN, I)); - } - llvm::sort(Aspects); +// Data structure, which represent a combination of all possible optional +// features used in a function. +// +// It has extra methods to be useable as a key in llvm::DenseMap. +struct UsedOptionalFeatures { + SmallVector Aspects; + // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, + // double-grf and other properties + + UsedOptionalFeatures() = default; + + UsedOptionalFeatures(const Function *F) { + if (const MDNode *MDN = F->getMetadata("sycl_used_aspects")) { + auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, + unsigned OpNo) -> auto { + Constant *C = + cast(N->getOperand(OpNo).get())->getValue(); + return C->getUniqueInteger().getSExtValue(); + }; + + for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { + Aspects.push_back(ExtractIntegerFromMDNodeOperand(MDN, I)); } - - llvm::hash_code AspectsHash = - llvm::hash_combine_range(Aspects.begin(), Aspects.end()); - Hash = static_cast(llvm::hash_combine(AspectsHash)); + llvm::sort(Aspects); } - std::string getName(StringRef BaseName) const { - if (Aspects.empty()) - return BaseName.str() + "-no-aspects"; + llvm::hash_code AspectsHash = + llvm::hash_combine_range(Aspects.begin(), Aspects.end()); + Hash = static_cast(llvm::hash_combine(AspectsHash)); + } - std::string Ret = BaseName.str() + "-aspects"; - for (int A : Aspects) { - Ret += "-" + std::to_string(A); - } - return Ret; - } + std::string getName(StringRef BaseName) const { + if (Aspects.empty()) + return BaseName.str() + "-no-aspects"; - static UsedOptionalFeatures getTombstone() { - UsedOptionalFeatures Ret; - Ret.IsTombstoneKey = true; - return Ret; + std::string Ret = BaseName.str() + "-aspects"; + for (int A : Aspects) { + Ret += "-" + std::to_string(A); } + return Ret; + } - static UsedOptionalFeatures getEmpty() { - UsedOptionalFeatures Ret; - Ret.IsEmpty = true; - return Ret; - } + static UsedOptionalFeatures getTombstone() { + UsedOptionalFeatures Ret; + Ret.IsTombstoneKey = true; + return Ret; + } - private: - // For DenseMap: - llvm::hash_code Hash = {}; - bool IsTombstoneKey = false; - bool IsEmpty = false; + static UsedOptionalFeatures getEmpty() { + UsedOptionalFeatures Ret; + Ret.IsEmpty = true; + return Ret; + } - public: - bool operator==(const UsedOptionalFeatures &Other) const { - // Tombstone does not compare equal to any other item - if (IsTombstoneKey || Other.IsTombstoneKey) - return false; +private: + // For DenseMap: + llvm::hash_code Hash = {}; + bool IsTombstoneKey = false; + bool IsEmpty = false; - if (Aspects.size() != Other.Aspects.size()) - return false; +public: + bool operator==(const UsedOptionalFeatures &Other) const { + // Tombstone does not compare equal to any other item + if (IsTombstoneKey || Other.IsTombstoneKey) + return false; - for (size_t I = 0, E = Aspects.size(); I != E; ++I) { - if (Aspects[I] != Other.Aspects[I]) - return false; - } + if (Aspects.size() != Other.Aspects.size()) + return false; - return IsEmpty == Other.IsEmpty; + for (size_t I = 0, E = Aspects.size(); I != E; ++I) { + if (Aspects[I] != Other.Aspects[I]) + return false; } - unsigned hash() const { - return static_cast(Hash); - } - }; + return IsEmpty == Other.IsEmpty; + } - struct UsedOptionalFeaturesAsKeyInfo { - static inline UsedOptionalFeatures getEmptyKey() { - return UsedOptionalFeatures::getEmpty(); - } + unsigned hash() const { return static_cast(Hash); } +}; - static inline UsedOptionalFeatures getTombstoneKey() { - return UsedOptionalFeatures::getTombstone(); - } +struct UsedOptionalFeaturesAsKeyInfo { + static inline UsedOptionalFeatures getEmptyKey() { + return UsedOptionalFeatures::getEmpty(); + } - static unsigned getHashValue(const UsedOptionalFeatures &Value) { - return Value.hash(); - } + static inline UsedOptionalFeatures getTombstoneKey() { + return UsedOptionalFeatures::getTombstone(); + } - static bool isEqual(const UsedOptionalFeatures &LHS, - const UsedOptionalFeatures &RHS) { - return LHS == RHS; - } - }; -} + static unsigned getHashValue(const UsedOptionalFeatures &Value) { + return Value.hash(); + } + + static bool isEqual(const UsedOptionalFeatures &LHS, + const UsedOptionalFeatures &RHS) { + return LHS == RHS; + } +}; +} // namespace std::unique_ptr getSplitterByOptionalFeatures(ModuleDesc &&MD, From 4be79277c37cbaa15a7b9596fd88c6c238ebf85d Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 16 Nov 2022 04:16:58 -0500 Subject: [PATCH 16/21] Refactor tests a bit --- .../device-code-split/per-aspect-split-1.ll | 3 --- .../device-code-split/per-aspect-split-2.ll | 18 +++++++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll index 6fd05a2f1c8ad..5c8fcfdca2385 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-1.ll @@ -54,15 +54,12 @@ ; CHECK-M2-IR: define {{.*}} @TU0_kernel0 ; CHECK-M2-SYMS: TU0_kernel0 -; CHECK-M2-SYMS-EMPTY: ; CHECK-M1-IR: define {{.*}} @TU0_kernel1 ; CHECK-M1-SYMS: TU0_kernel1 -; CHECK-M1-SYMS-EMPTY: ; CHECK-M0-IR: define {{.*}} @TU1_kernel2 ; CHECK-M0-SYMS: TU1_kernel2 -; CHECK-M0-SYMS-EMPTY: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-linux" diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll index ff9977d2d72d9..c36601746a741 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll @@ -3,26 +3,30 @@ ; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M1-SYMS -; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS +; +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel1 \ +; RUN: --implicit-check-not kernel2 +; +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M1-SYMS \ +; RUN: --implicit-check-not kernel3 --implicit-check-not kernel1 \ +; RUN: --implicit-check-not kernel2 +; +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not kernel3 ; FIXME: We expect to see exactly three modules ; CHECK-TABLE: Code ; CHECK-TABLE-NEXT: _0.sym ; CHECK-TABLE-NEXT: _1.sym ; CHECK-TABLE-NEXT: _2.sym -; CHECK-TABLE-EMPTY: ; CHECK-M0-SYMS: kernel3 -; CHECK-M0-SYMS-EMPTY: ; CHECK-M1-SYMS: kernel0 -; CHECK-M1-SYMS-EMPTY: ; CHECK-M2-SYMS: kernel1 ; CHECK-M2-SYMS: kernel2 -; CHECK-M2-SYMS-EMPTY: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" target triple = "spir64-unknown-linux" From 01c88f4d235d4969cdd34c370fb3004d92c72462 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 16 Nov 2022 05:42:41 -0500 Subject: [PATCH 17/21] Add test for SYCL_EXTERNAL functions --- .../device-code-split/per-aspect-split-2.ll | 2 +- .../device-code-split/per-aspect-split-3.ll | 74 +++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll index c36601746a741..ff9aa0f29376e 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-2.ll @@ -15,11 +15,11 @@ ; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS \ ; RUN: --implicit-check-not kernel0 --implicit-check-not kernel3 -; FIXME: We expect to see exactly three modules ; CHECK-TABLE: Code ; CHECK-TABLE-NEXT: _0.sym ; CHECK-TABLE-NEXT: _1.sym ; CHECK-TABLE-NEXT: _2.sym +; CHECK-TABLE-EMPTY: ; CHECK-M0-SYMS: kernel3 diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll new file mode 100644 index 0000000000000..b4d414a9696f4 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll @@ -0,0 +1,74 @@ +; This test is intended to check that per-aspect device code split works as +; expected with SYCL_EXTERNAL functions + +; RUN: sycl-post-link -split=auto -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE +; +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefix CHECK-M0-SYMS \ +; RUN: --implicit-check-not foo --implicit-check-not kernel1 +; +; RUN: FileCheck %s -input-file=%t_1.sym --check-prefix CHECK-M1-SYMS \ +; RUN: --implicit-check-not foo --implicit-check-not kernel0 +; +; RUN: FileCheck %s -input-file=%t_2.sym --check-prefix CHECK-M2-SYMS \ +; RUN: --implicit-check-not kernel0 --implicit-check-not foo \ +; RUN: --implicit-check-not bar +; +; RUN: FileCheck %s -input-file=%t_2.ll --check-prefix CHECK-M2-IR \ +; RUN: --implicit-check-not kernel0 --implicit-check-not bar + +; We expect to see 3 modules generated: +; +; CHECK-TABLE: Code +; CHECK-TABLE-NEXT: _0.sym +; CHECK-TABLE-NEXT: _1.sym +; CHECK-TABLE-NEXT: _2.sym +; CHECK-TABLE-EMPTY: + +; Both @bar and @kernel0 use the same aspects and contained within the same +; translation unit, so they should be bundled together. +; +; CHECK-M0-SYMS: bar +; CHECK-M0-SYMS: kernel0 + +; @foo is a SYCL_EXTERNAL function, it should be exported and outlined into a +; separate translation unit because of used aspects +; +; CHECK-M1-SYMS: foo + +; CHECK-M2-SYMS: kernel1 +; +; @kernel1 uses @foo and therefore @foo should be present in the same module as +; @kernel1 as well +; CHECK-M2-IR-DAG: define spir_func void @foo +; CHECK-M2-IR-DAG: define spir_kernel void @kernel1 + + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux" + +define spir_func void @foo() #0 !sycl_used_aspects !1 { + ret void +} + +define spir_func void @bar() #1 !sycl_used_aspects !2 { + ret void +} + +define spir_kernel void @kernel0() #1 !sycl_used_aspects !2 { +entry: + ret void +} + +define spir_kernel void @kernel1() #0 !sycl_used_aspects !3 { +entry: + call void @foo() + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } + +!1 = !{i32 1} +!2 = !{i32 2} +!3 = !{i32 3, i32 1} From 12e26226adf7f812d11de501c8cb7564385273a7 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 17 Nov 2022 08:33:50 -0500 Subject: [PATCH 18/21] Do not always perform per-aspect split --- .../device-code-split/per-aspect-split-4.ll | 55 +++++++++++++++++++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 7 +++ 2 files changed, 62 insertions(+) create mode 100644 llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll new file mode 100644 index 0000000000000..96972687a1e7f --- /dev/null +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-4.ll @@ -0,0 +1,55 @@ +; This test is intended to check that we do not perform per-aspect split if +; it was disabled through one or another sycl-post-link option + +; RUN: sycl-post-link -symbols -S %s -o %t.table +; RUN: FileCheck %s -input-file=%t.table --check-prefix CHECK-TABLE +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefix CHECK-IR +; +; -lower-esimd is needed so sycl-post-link does not complain about no actions +; specified +; RUN: sycl-post-link -lower-esimd -ir-output-only -S %s -o %t.ll +; RUN: FileCheck %s -input-file=%t.ll --check-prefix CHECK-IR + +; We expect to see only one module generated: +; +; CHECK-TABLE: Code +; CHECK-TABLE-NEXT: _0.ll +; CHECK-TABLE-EMPTY: + +; Regardless of used aspects and sycl-module-id metadata, all kernel and +; functions should still be present. + +; CHECK-IR-DAG: define spir_func void @foo +; CHECK-IR-DAG: define spir_func void @bar +; CHECK-IR-DAG: define spir_kernel void @kernel0 +; CHECK-IR-DAG: define spir_kernel void @kernel1 + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir64-unknown-linux" + +define spir_func void @foo() #0 !sycl_used_aspects !1 { + ret void +} + +define spir_func void @bar() #1 !sycl_used_aspects !2 { + ret void +} + +define spir_kernel void @kernel0() #1 !sycl_used_aspects !2 { +entry: + ret void +} + +define spir_kernel void @kernel1() #0 !sycl_used_aspects !3 { +entry: + call void @foo() + ret void +} + +attributes #0 = { "sycl-module-id"="TU1.cpp" } +attributes #1 = { "sycl-module-id"="TU2.cpp" } + +!1 = !{i32 1} +!2 = !{i32 2} +!3 = !{i32 3, i32 1} + diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index cf278b3d59bca..48314308dc3dd 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -766,6 +766,13 @@ processInputModule(std::unique_ptr M) { while (ScopedSplitter->hasMoreSplits()) { module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); + + if (IROutputOnly || SplitMode == module_split::SPLIT_NONE) { + // We can't perform any kind of split + TopLevelModules.emplace_back(std::move(MD)); + continue; + } + std::unique_ptr OptionalFeaturesSplitter = module_split::getSplitterByOptionalFeatures( std::move(MD), EmitOnlyKernelsAsEntryPoints); From b39798bc075020de5c2621e0254a8dac46466f6d Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 17 Nov 2022 08:36:42 -0500 Subject: [PATCH 19/21] Apply review comments --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index f5f9f1e6b51e5..8c19cd826b2be 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -782,6 +782,8 @@ struct UsedOptionalFeatures { return C->getUniqueInteger().getSExtValue(); }; + // !sycl_used_aspects is supposed to contain unique values, no duplicates + // are expected here for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { Aspects.push_back(ExtractIntegerFromMDNodeOperand(MDN, I)); } @@ -880,7 +882,7 @@ getSplitterByOptionalFeatures(ModuleDesc &&MD, } auto Key = UsedOptionalFeatures(&F); - PropertiesToFunctionsMap[Key].insert(&F); + PropertiesToFunctionsMap[std::move(Key)].insert(&F); } if (!PropertiesToFunctionsMap.empty()) { From 0cc45b2c1a04551249551bf104bd99b559a55d83 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 21 Nov 2022 08:54:27 -0500 Subject: [PATCH 20/21] Apply comments --- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 24 ++++++++------------ llvm/tools/sycl-post-link/sycl-post-link.cpp | 4 ++-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 8c19cd826b2be..db16ea177081f 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -775,18 +775,15 @@ struct UsedOptionalFeatures { UsedOptionalFeatures(const Function *F) { if (const MDNode *MDN = F->getMetadata("sycl_used_aspects")) { - auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, - unsigned OpNo) -> auto { - Constant *C = - cast(N->getOperand(OpNo).get())->getValue(); + auto ExtractIntegerFromMDNodeOperand = [=](const MDOperand &N) { + Constant *C = cast(N.get())->getValue(); return C->getUniqueInteger().getSExtValue(); }; // !sycl_used_aspects is supposed to contain unique values, no duplicates // are expected here - for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { - Aspects.push_back(ExtractIntegerFromMDNodeOperand(MDN, I)); - } + llvm::transform(MDN->operands(), std::back_inserter(Aspects), + ExtractIntegerFromMDNodeOperand); llvm::sort(Aspects); } @@ -885,16 +882,15 @@ getSplitterByOptionalFeatures(ModuleDesc &&MD, PropertiesToFunctionsMap[std::move(Key)].insert(&F); } - if (!PropertiesToFunctionsMap.empty()) { + if (PropertiesToFunctionsMap.empty()) { + // No entry points met, record this. + Groups.emplace_back(GLOBAL_SCOPE_NAME, EntryPointSet{}); + } else { Groups.reserve(PropertiesToFunctionsMap.size()); for (auto &EPG : PropertiesToFunctionsMap) { - Groups.emplace_back(EntryPointGroup{ - EPG.first.getName(MD.getEntryPointGroup().GroupId), - std::move(EPG.second), MD.getEntryPointGroup().Props}); + Groups.emplace_back(EPG.first.getName(MD.getEntryPointGroup().GroupId), + std::move(EPG.second), MD.getEntryPointGroup().Props); } - } else { - // No entry points met, record this. - Groups.push_back({GLOBAL_SCOPE_NAME, {}}); } if (Groups.size() > 1) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 48314308dc3dd..5a99dbe22b861 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -768,7 +768,7 @@ processInputModule(std::unique_ptr M) { module_split::ModuleDesc MD = ScopedSplitter->nextSplit(); if (IROutputOnly || SplitMode == module_split::SPLIT_NONE) { - // We can't perform any kind of split + // We can't perform any kind of split. TopLevelModules.emplace_back(std::move(MD)); continue; } @@ -800,7 +800,7 @@ processInputModule(std::unique_ptr M) { // "leaf" ModuleDesc's resulted from splitting. Some bookkeeping is needed for // ESIMD splitter to link back needed modules. - // Based on results from a top-level splitting, we perform some lower-level + // Based on results from the top-level splitting, we perform some lower-level // splitting for various unique features. for (module_split::ModuleDesc &MDesc : TopLevelModules) { DUMP_ENTRY_POINTS(MDesc.entries(), MDesc.Name.c_str(), 1); From 61ec5e95dd84c55731fa970954e8a563fceaa565 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Mon, 21 Nov 2022 14:03:54 -0500 Subject: [PATCH 21/21] Apply comments --- .../device-code-split/per-aspect-split-3.ll | 32 +++++++++++++++---- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 2 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 2 +- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll index b4d414a9696f4..5fa587abca234 100644 --- a/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll +++ b/llvm/test/tools/sycl-post-link/device-code-split/per-aspect-split-3.ll @@ -25,17 +25,32 @@ ; CHECK-TABLE-NEXT: _2.sym ; CHECK-TABLE-EMPTY: -; Both @bar and @kernel0 use the same aspects and contained within the same -; translation unit, so they should be bundled together. +; sycl-post-link aims to achieve two goals while doing splitting: +; - each kernel must be self-contained, i.e. all functions called from a +; kernel must reside in the same device image +; - each entry point should be assigned to a correct device image in +; accordance with selected device code split mode +; +; In this test @bar and @foo are SYCL_EXTERNAL functions and they are treated +; as entry points. +; +; @bar uses the same list of aspects as @kernel0 which calls it and therefore +; they can be put into the same device image. There also goes @baz, because of +; the same list of used aspects. ; ; CHECK-M0-SYMS: bar +; CHECK-M0-SYMS: baz ; CHECK-M0-SYMS: kernel0 - -; @foo is a SYCL_EXTERNAL function, it should be exported and outlined into a -; separate translation unit because of used aspects +; +; List of aspects used by @foo is different from the one attached to @kernel1 +; which calls @foo (for example, @kernel1 uses an extra optional feature besides +; ones used in @foo). As a result, @foo should be both included into the same +; device image as @kernel1 to make it self contained, but at the same time it +; should also present in a separate device image, because it is an entry point +; with unique set of used aspects. ; ; CHECK-M1-SYMS: foo - +; ; CHECK-M2-SYMS: kernel1 ; ; @kernel1 uses @foo and therefore @foo should be present in the same module as @@ -55,8 +70,13 @@ define spir_func void @bar() #1 !sycl_used_aspects !2 { ret void } +define spir_func void @baz() #1 !sycl_used_aspects !2 { + ret void +} + define spir_kernel void @kernel0() #1 !sycl_used_aspects !2 { entry: + call void @bar() ret void } diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index db16ea177081f..1ce47af2f0b74 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -769,7 +769,7 @@ namespace { struct UsedOptionalFeatures { SmallVector Aspects; // TODO: extend this further with reqd-sub-group-size, reqd-work-group-size, - // double-grf and other properties + // large-grf and other properties UsedOptionalFeatures() = default; diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 5a99dbe22b861..eb65da70c7d1c 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -805,7 +805,7 @@ processInputModule(std::unique_ptr M) { for (module_split::ModuleDesc &MDesc : TopLevelModules) { DUMP_ENTRY_POINTS(MDesc.entries(), MDesc.Name.c_str(), 1); - // FIXME: double grf should be handled by properties splitter above + // FIXME: large grf should be handled by properties splitter above std::unique_ptr LargeGRFSplitter = module_split::getLargeGRFSplitter(std::move(MDesc), EmitOnlyKernelsAsEntryPoints);