From 6fcfe5275039a6d57726e4e9663446856851aea4 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 6 Mar 2025 06:53:39 -0800 Subject: [PATCH 01/19] [SYCL] Move kernel_compiler related information This commit moves the kernel compiler related information from the kernel bundles to the device images. This separation allows the implementation to properly joining and (in the near future) linking of kernel bundles created from different paths. Signed-off-by: Larsen, Steffen --- sycl/source/backend.cpp | 3 +- sycl/source/detail/device_image_impl.hpp | 572 +++++++++++++++- sycl/source/detail/graph_impl.cpp | 16 +- sycl/source/detail/helpers.cpp | 17 +- sycl/source/detail/kernel_bundle_impl.hpp | 635 +++++------------- sycl/source/detail/kernel_impl.cpp | 23 +- sycl/source/detail/kernel_impl.hpp | 3 + .../program_manager/program_manager.cpp | 492 ++++++++------ .../program_manager/program_manager.hpp | 15 +- sycl/source/detail/queue_impl.cpp | 7 +- sycl/source/detail/scheduler/commands.cpp | 63 +- sycl/source/handler.cpp | 7 +- .../arg_mask/EliminatedArgMask.cpp | 8 +- 13 files changed, 1087 insertions(+), 774 deletions(-) diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 2c876a570e3c6..dabe45e2850b2 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -299,7 +299,8 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, // symbols (e.g. when kernel_bundle is supposed to be joined with another). auto KernelIDs = std::make_shared>(); auto DevImgImpl = std::make_shared( - nullptr, TargetContext, Devices, State, KernelIDs, UrProgram); + nullptr, TargetContext, Devices, State, KernelIDs, UrProgram, + ImageOriginInterop); device_image_plain DevImg{DevImgImpl}; return std::make_shared(TargetContext, Devices, DevImg); diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index ac58b7b80f467..9266ec6748e8c 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -12,8 +12,12 @@ #include #include #include +#include +#include #include +#include #include +#include #include #include #include @@ -38,6 +42,79 @@ template struct LessByHash { } }; +namespace syclex = sycl::ext::oneapi::experimental; + +using include_pairs_t = + std::vector>; + +// Bits representing the origin of a given image, i.e. regular offline SYCL +// compilation, interop, kernel_compiler online compilation, etc. +constexpr uint8_t ImageOriginSYCLOffline = 1; +constexpr uint8_t ImageOriginInterop = 1 << 1; +constexpr uint8_t ImageOriginKernelCompiler = 1 << 2; + +// Information unique to images compiled at runtime through the +// ext_oneapi_kernel_compiler extension. +struct KernelCompilerBinaryInfo { + KernelCompilerBinaryInfo(syclex::source_language Lang, + std::set &&KernelNames = {}, + include_pairs_t &&IncludePairsVec = {}) + : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, + MIncludePairs{std::move(IncludePairsVec)} {} + + KernelCompilerBinaryInfo( + syclex::source_language Lang, std::set &&KernelNames, + std::unordered_map &&MangledKernelNames, + std::string &&Prefix) + : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, + MMangledKernelNames{std::move(MangledKernelNames)}, + MPrefix{std::move(Prefix)} {} + + static std::optional + Merge(const std::vector *> + &RTCInfos) { + std::optional Result = std::nullopt; + for (const std::optional *RTCInfoPtr : RTCInfos) { + if (!RTCInfoPtr || !(*RTCInfoPtr)) + continue; + const std::optional &RTCInfo = *RTCInfoPtr; + + if (!Result) { + Result = RTCInfo; + continue; + } + + if (RTCInfo->MLanguage != Result->MLanguage) + throw sycl::exception(make_error_code(errc::invalid), + "Linking binaries with different source " + "languages is not currently supported."); + + if (!RTCInfo->MPrefix.empty() && !Result->MPrefix.empty() && + RTCInfo->MPrefix != Result->MPrefix) + throw sycl::exception(make_error_code(errc::invalid), + "Linking binaries with different kernel prefixes " + "is not currently supported."); + + for (const std::string &KernelName : RTCInfo->MKernelNames) + Result->MKernelNames.insert(KernelName); + + Result->MMangledKernelNames.insert(RTCInfo->MMangledKernelNames.begin(), + RTCInfo->MMangledKernelNames.end()); + + Result->MIncludePairs.insert(Result->MIncludePairs.end(), + RTCInfo->MIncludePairs.begin(), + RTCInfo->MIncludePairs.end()); + } + return Result; + } + + syclex::source_language MLanguage; + std::set MKernelNames; + std::unordered_map MMangledKernelNames; + std::string MPrefix; + include_pairs_t MIncludePairs; +}; + // The class is impl counterpart for sycl::device_image // It can represent a program in different states, kernel_id's it has and state // of specialization constants for it @@ -60,25 +137,100 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, std::shared_ptr> KernelIDs, - ur_program_handle_t Program) + ur_program_handle_t Program, + uint8_t Origins = ImageOriginSYCLOffline) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), - MSpecConstsDefValBlob(getSpecConstsDefValBlob()) { + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(Origins) { updateSpecConstSymMap(); } - device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, - std::vector Devices, bundle_state State, + device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, + const std::vector &Devices, bundle_state State, std::shared_ptr> KernelIDs, ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, - const std::vector &SpecConstsBlob) + const std::vector &SpecConstsBlob, + uint8_t Origins, + std::optional &&RTCInfo) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), - MSpecConstSymMap(SpecConstMap) {} + MSpecConstSymMap(SpecConstMap), MOrigins(Origins), + MRTCBinInfo(std::move(RTCInfo)) {} + + device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, + const std::vector &Devices, bundle_state State, + ur_program_handle_t Program, syclex::source_language Lang, + std::set &&KernelNames) + : MBinImage(BinImage), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(State), MProgram(Program), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) { + updateSpecConstSymMap(); + } + + device_image_impl( + const RTDeviceBinaryImage *BinImage, const context &Context, + const std::vector &Devices, bundle_state State, + std::shared_ptr> &&KernelIDs, + syclex::source_language Lang, std::set &&KernelNames, + std::unordered_map &&MangledKernelNames, + sycl_device_binary Binary, std::string &&Prefix) + : MBinImage(BinImage), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(State), MProgram(nullptr), + MKernelIDs(std::move(KernelIDs)), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames), + std::move(MangledKernelNames), + std::move(Prefix)}) { + updateSpecConstSymMap(); + } + + device_image_impl(const std::string &Src, context Context, + const std::vector &Devices, + syclex::source_language Lang, + include_pairs_t &&IncludePairsVec) + : MBinImage(Src), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(bundle_state::ext_oneapi_source), + MProgram(nullptr), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::set{}, + std::move(IncludePairsVec)}) { + updateSpecConstSymMap(); + } + + device_image_impl(const std::vector &Bytes, const context &Context, + const std::vector &Devices, + syclex::source_language Lang) + : MBinImage(Bytes), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(bundle_state::ext_oneapi_source), + MProgram(nullptr), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang}) { + updateSpecConstSymMap(); + } + + device_image_impl(const context &Context, const std::vector &Devices, + bundle_state State, ur_program_handle_t Program, + syclex::source_language Lang, + std::set &&KernelNames) + : MBinImage(static_cast(nullptr)), + MContext(std::move(Context)), MDevices(std::move(Devices)), + MState(State), MProgram(Program), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) {} bool has_kernel(const kernel_id &KernelIDCand) const noexcept { return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(), @@ -127,7 +279,7 @@ class device_image_impl { // function is make_kernel(), but I'm not sure if it's even possible to // use spec constant with such kernel. So, in such case we need to check // if it's JIT or no somehow. - assert(MBinImage && + assert(hasRTDeviceBinaryImage() && "native_specialization_constant() called for unimplemented case"); auto IsJITSPIRVTarget = [](const char *Target) { @@ -136,7 +288,7 @@ class device_image_impl { }; return (MContext.get_backend() == backend::opencl || MContext.get_backend() == backend::ext_oneapi_level_zero) && - IsJITSPIRVTarget(MBinImage->getRawData().DeviceTargetSpec); + IsJITSPIRVTarget(get_bin_image_ref()->getRawData().DeviceTargetSpec); } bool has_specialization_constant(const char *SpecName) const noexcept { @@ -231,7 +383,7 @@ class device_image_impl { bool specialization_constants_replaced_with_default() const noexcept { sycl_device_binary_property Prop = - MBinImage->getProperty("specConstsReplacedWithDefault"); + get_bin_image_ref()->getProperty("specConstsReplacedWithDefault"); return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0); } @@ -251,7 +403,9 @@ class device_image_impl { return MProgram; } - const RTDeviceBinaryImage *&get_bin_image_ref() noexcept { return MBinImage; } + const RTDeviceBinaryImage *const &get_bin_image_ref() const noexcept { + return std::get(MBinImage); + } const context &get_context() const noexcept { return MContext; } @@ -325,16 +479,393 @@ class device_image_impl { } } + std::string adjustKernelName(const std::string &Name) const { + if (!MRTCBinInfo.has_value()) + return Name; + + switch (MRTCBinInfo->MLanguage) { + case syclex::source_language::sycl_jit: { + auto It = MRTCBinInfo->MMangledKernelNames.find(Name); + if (It != MRTCBinInfo->MMangledKernelNames.end()) + return It->second; + break; + } + case syclex::source_language::sycl: + if (Name.find("__sycl_kernel_") == std::string::npos) + return "__sycl_kernel_" + Name; + break; + default: + break; + } + return Name; + } + + bool hasKernelName(const std::string &Name) const { + return MRTCBinInfo.has_value() && + MRTCBinInfo->MKernelNames.find(adjustKernelName(Name)) != + MRTCBinInfo->MKernelNames.end(); + } + + std::shared_ptr tryGetSourceBasedKernel( + const std::string &Name, const context &Context, + const std::shared_ptr &OwnerBundle, + const std::shared_ptr &Self) const { + if (!(getOriginMask() & ImageOriginKernelCompiler)) + return nullptr; + + assert(MRTCBinInfo); + std::string AdjustedName = adjustKernelName(Name); + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl_jit) { + auto &PM = ProgramManager::getInstance(); + auto KID = PM.tryGetSYCLKernelID(MRTCBinInfo->MPrefix + AdjustedName); + + if (!KID || !has_kernel(*KID)) + return nullptr; + + auto UrProgram = get_ur_program_ref(); + auto [UrKernel, CacheMutex, ArgMask] = + PM.getOrCreateKernel(Context, AdjustedName, + /*PropList=*/{}, UrProgram); + return std::make_shared(UrKernel, getSyclObjImpl(Context), + Self, OwnerBundle, ArgMask, + UrProgram, CacheMutex); + } + + ur_program_handle_t UrProgram = get_ur_program_ref(); + const AdapterPtr &Adapter = getSyclObjImpl(Context)->getAdapter(); + ur_kernel_handle_t UrKernel = nullptr; + Adapter->call(UrProgram, AdjustedName.c_str(), + &UrKernel); + // Kernel created by urKernelCreate is implicitly retained. + + return std::make_shared( + UrKernel, detail::getSyclObjImpl(Context), Self, OwnerBundle, + /*ArgMask=*/nullptr, UrProgram, /*CacheMutex=*/nullptr); + } + + uint8_t getOriginMask() const noexcept { return MOrigins; } + + const std::optional &getRTCInfo() const noexcept { + return MRTCBinInfo; + } + + bool isNonSYCLJITSourceBased() const noexcept { + return (getOriginMask() & ImageOriginKernelCompiler) && + !isFromSourceLanguage(syclex::source_language::sycl_jit); + } + + bool isFromSourceLanguage(syclex::source_language Lang) const noexcept { + return MRTCBinInfo && MRTCBinInfo->MLanguage == Lang; + } + + std::vector> + buildFromSource(const std::vector Devices, + const std::vector &BuildOptions, + std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector &OutDeviceBinaries) const { + assert(!std::holds_alternative(MBinImage)); + assert(MRTCBinInfo); + assert(MOrigins & ImageOriginKernelCompiler); + + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl_jit) { + assert(std::holds_alternative(MBinImage)); + + // Build device images via the program manager. + const std::string &SourceStr = std::get(MBinImage); + std::ostringstream SourceExt; + if (!RegisteredKernelNames.empty()) { + SourceExt << SourceStr << '\n'; + + auto EmitEntry = + [&SourceExt](const std::string &Name) -> std::ostringstream & { + SourceExt << " {\"" << Name << "\", " << Name << "}"; + return SourceExt; + }; + + SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; + for (auto It = RegisteredKernelNames.begin(), + SecondToLast = RegisteredKernelNames.end() - 1; + It != SecondToLast; ++It) { + EmitEntry(*It) << ",\n"; + } + EmitEntry(RegisteredKernelNames.back()) << "\n"; + SourceExt << ")]];\n"; + } + + auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_to_SPIRV( + RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), + MRTCBinInfo->MIncludePairs, BuildOptions, LogPtr); + + auto &PM = detail::ProgramManager::getInstance(); + std::vector> Result; + Result.reserve(Binaries->NumDeviceBinaries); + for (int I = 0; I < Binaries->NumDeviceBinaries; I++) { + sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); + + RTDeviceBinaryImage *NewImage = nullptr; + auto KernelIDs = std::make_shared>(); + PM.addImage(Binary, &NewImage, KernelIDs.get()); + + // If the image is empty, we can skip it. + if (!NewImage) + continue; + + std::set KernelNames; + std::unordered_map MangledKernelNames; + for (const auto &KernelID : *KernelIDs) { + std::string_view KernelName{KernelID.get_name()}; + if (KernelName.find(Prefix) == 0) { + KernelName.remove_prefix(Prefix.length()); + KernelNames.emplace(KernelName); + static constexpr std::string_view SYCLKernelMarker{ + "__sycl_kernel_"}; + if (KernelName.find(SYCLKernelMarker) == 0) { + // extern "C" declaration, implicitly register kernel without the + // marker. + std::string_view KernelNameWithoutMarker{KernelName}; + KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); + MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); + } + } + + // Apply frontend information. + for (const sycl_device_binary_property &RKProp : + NewImage->getRegisteredKernels()) { + + auto BA = DeviceBinaryProperty(RKProp).asByteArray(); + auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; + std::string_view MangledName{ + reinterpret_cast(BA.begin()), MangledNameLen}; + MangledKernelNames.emplace(RKProp->Name, MangledName); + } + } + + // Mark the image as input so the program manager will bring it into the + // right state. + auto DevImgImpl = std::make_shared( + NewImage, MContext, Devices, bundle_state::input, + std::move(KernelIDs), MRTCBinInfo->MLanguage, + std::move(KernelNames), std::move(MangledKernelNames), Binary, + std::string{Prefix}); + + DevImgPlainWithDeps ImgWithDeps{DevImgImpl}; + PM.bringSYCLDeviceImageToState(ImgWithDeps, bundle_state::executable); + Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); + } + + OutDeviceBinaries.push_back(std::move(Binaries)); + return Result; + } + + std::vector DeviceVec; + DeviceVec.reserve(Devices.size()); + for (const auto &SyclDev : Devices) { + ur_device_handle_t Dev = getSyclObjImpl(SyclDev)->getHandleRef(); + DeviceVec.push_back(Dev); + } + + ur_program_handle_t UrProgram = nullptr; + // SourceStrPtr will be null when source is Spir-V bytes. + const std::string *SourceStrPtr = std::get_if(&MBinImage); + bool FetchedFromCache = false; + if (PersistentDeviceCodeCache::isEnabled() && SourceStrPtr) { + FetchedFromCache = extKernelCompilerFetchFromCache( + Devices, BuildOptions, *SourceStrPtr, UrProgram); + } + + std::shared_ptr ContextImpl = + getSyclObjImpl(MContext); + const AdapterPtr &Adapter = ContextImpl->getAdapter(); + + if (!FetchedFromCache) { + const auto spirv = [&]() -> std::vector { + switch (MRTCBinInfo->MLanguage) { + case syclex::source_language::opencl: { + // if successful, the log is empty. if failed, throws an error with + // the compilation log. + std::vector IPVersionVec(Devices.size()); + std::transform(DeviceVec.begin(), DeviceVec.end(), + IPVersionVec.begin(), [&](ur_device_handle_t d) { + uint32_t ipVersion = 0; + Adapter->call( + d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), + &ipVersion, nullptr); + return ipVersion; + }); + return syclex::detail::OpenCLC_to_SPIRV(*SourceStrPtr, IPVersionVec, + BuildOptions, LogPtr); + } + case syclex::source_language::spirv: { + const auto &SourceBytes = std::get>(MBinImage); + std::vector Result(SourceBytes.size()); + std::transform(SourceBytes.cbegin(), SourceBytes.cend(), + Result.begin(), + [](std::byte B) { return static_cast(B); }); + return Result; + } + case syclex::source_language::sycl: { + return syclex::detail::SYCL_to_SPIRV( + *SourceStrPtr, MRTCBinInfo->MIncludePairs, BuildOptions, LogPtr, + RegisteredKernelNames); + } + default: + break; + } + throw sycl::exception( + make_error_code(errc::invalid), + "SYCL C++, OpenCL C and SPIR-V are the only supported " + "languages at this time"); + }(); + + Adapter->call( + ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr, + &UrProgram); + // program created by urProgramCreateWithIL is implicitly retained. + if (UrProgram == nullptr) + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "urProgramCreateWithIL resulted in a null program handle."); + + } // if(!FetchedFromCache) + + std::string XsFlags = extractXsFlags(BuildOptions); + auto Res = Adapter->call_nocheck( + UrProgram, DeviceVec.size(), DeviceVec.data(), XsFlags.c_str()); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Adapter->call_nocheck( + ContextImpl->getHandleRef(), UrProgram, XsFlags.c_str()); + } + Adapter->checkUrResult(Res); + + // Get the number of kernels in the program. + size_t NumKernels; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_NUM_KERNELS, sizeof(size_t), &NumKernels, + nullptr); + + // Get the kernel names. + size_t KernelNamesSize; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + + // semi-colon delimited list of kernel names. + std::string KernelNamesStr(KernelNamesSize, ' '); + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), + &KernelNamesStr[0], nullptr); + std::vector KernelNames = + detail::split_string(KernelNamesStr, ';'); + std::set KernelNameSet{KernelNames.begin(), KernelNames.end()}; + + // If caching enabled and kernel not fetched from cache, cache. + if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache && + SourceStrPtr) { + PersistentDeviceCodeCache::putCompiledKernelToDisc( + Devices, syclex::detail::userArgsAsString(BuildOptions), + *SourceStrPtr, UrProgram); + } + return std::vector>{ + std::make_shared( + MContext, Devices, bundle_state::executable, UrProgram, + MRTCBinInfo->MLanguage, std::move(KernelNameSet))}; + } + private: + bool hasRTDeviceBinaryImage() const noexcept { + return std::holds_alternative(MBinImage) && + get_bin_image_ref() != nullptr; + } + + static std::string trimXsFlags(std::string &str) { + // Trim first and last quote if they exist, but no others. + char EncounteredQuote = '\0'; + auto Start = std::find_if(str.begin(), str.end(), [&](char c) { + if (!EncounteredQuote && (c == '\'' || c == '"')) { + EncounteredQuote = c; + return false; + } + return !std::isspace(c); + }); + auto End = std::find_if(str.rbegin(), str.rend(), [&](char c) { + if (c == EncounteredQuote) { + EncounteredQuote = '\0'; + return false; + } + return !std::isspace(c); + }).base(); + if (Start != std::end(str) && End != std::begin(str) && Start < End) { + return std::string(Start, End); + } + + return ""; + } + + static std::string + extractXsFlags(const std::vector &BuildOptions) { + std::stringstream SS; + for (std::string Option : BuildOptions) { + auto Where = Option.find("-Xs"); + if (Where != std::string::npos) { + Where += 3; + std::string Flags = Option.substr(Where); + SS << trimXsFlags(Flags) << " "; + } + } + return SS.str(); + } + + bool + extKernelCompilerFetchFromCache(const std::vector Devices, + const std::vector &BuildOptions, + const std::string &SourceStr, + ur_program_handle_t &UrProgram) const { + std::shared_ptr ContextImpl = + getSyclObjImpl(MContext); + const AdapterPtr &Adapter = ContextImpl->getAdapter(); + + std::string UserArgs = syclex::detail::userArgsAsString(BuildOptions); + + std::vector DeviceHandles; + std::transform( + Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles), + [](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); }); + + std::vector Binaries; + std::vector Lengths; + std::vector> BinProgs = + PersistentDeviceCodeCache::getCompiledKernelFromDisc(Devices, UserArgs, + SourceStr); + if (BinProgs.empty()) { + return false; + } + for (auto &BinProg : BinProgs) { + Binaries.push_back((uint8_t *)(BinProg.data())); + Lengths.push_back(BinProg.size()); + } + + ur_program_properties_t Properties = {}; + Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; + Properties.pNext = nullptr; + Properties.count = 0; + Properties.pMetadatas = nullptr; + + Adapter->call( + ContextImpl->getHandleRef(), DeviceHandles.size(), DeviceHandles.data(), + Lengths.data(), Binaries.data(), &Properties, &UrProgram); + + return true; + } + // Get the specialization constant default value blob. ByteArray getSpecConstsDefValBlob() const { - if (!MBinImage) + if (!hasRTDeviceBinaryImage()) return ByteArray(nullptr, 0); // Get default values for specialization constants. const RTDeviceBinaryImage::PropertyRange &SCDefValRange = - MBinImage->getSpecConstantsDefaultValues(); - if (!SCDefValRange.size()) + get_bin_image_ref()->getSpecConstantsDefaultValues(); + if (!SCDefValRange.isAvailable()) return ByteArray(nullptr, 0); ByteArray DefValDescriptors = @@ -345,9 +876,9 @@ class device_image_impl { } void updateSpecConstSymMap() { - if (MBinImage) { + if (hasRTDeviceBinaryImage()) { const RTDeviceBinaryImage::PropertyRange &SCRange = - MBinImage->getSpecConstants(); + get_bin_image_ref()->getSpecConstants(); using SCItTy = RTDeviceBinaryImage::PropertyRange::ConstIterator; // This variable is used to calculate spec constant value offset in a @@ -400,7 +931,9 @@ class device_image_impl { } } - const RTDeviceBinaryImage *MBinImage = nullptr; + const std::variant, + const RTDeviceBinaryImage *> + MBinImage = static_cast(nullptr); context MContext; std::vector MDevices; bundle_state MState; @@ -427,6 +960,13 @@ class device_image_impl { // Contains map of spec const names to their descriptions + offsets in // the MSpecConstsBlob std::map> MSpecConstSymMap; + + // MOrigins is a bitfield to allow cases where the image is the product of + // merging images of different origins. + uint8_t MOrigins = ImageOriginSYCLOffline; + // Optional information about the binary produced by the kernel compiler + // extension. + std::optional MRTCBinInfo = std::nullopt; }; } // namespace detail diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index ea6901f93b264..9e731ba588da1 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1519,18 +1519,10 @@ void exec_graph_impl::populateURKernelUpdateStructs( std::shared_ptr SyclKernelImpl = nullptr; const sycl::detail::KernelArgMask *EliminatedArgMask = nullptr; - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the kernels - // in interop kernel bundles (if any) do not have kernel_id - // and can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - auto KernelName = ExecCG.MKernelName; - kernel_id KernelID = - sycl::detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - SyclKernelImpl = sycl::detail::getSyclObjImpl(SyclKernel); + if (auto SyclKernelImpl = KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel( + ExecCG.MKernelName, KernelBundleImplPtr) + : std::shared_ptr{nullptr}) { UrKernel = SyclKernelImpl->getHandleRef(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else if (Kernel != nullptr) { diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index d8afc90b48d85..e2471b5dbd94b 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -72,17 +72,14 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, const RTDeviceBinaryImage *DeviceImage = nullptr; ur_program_handle_t Program = nullptr; - if (KernelCG->getKernelBundle() != nullptr) { + auto KernelBundleImpl = KernelCG->getKernelBundle(); + if (auto SyclKernelImpl = + KernelBundleImpl + ? KernelBundleImpl->tryGetKernel(KernelName, KernelBundleImpl) + : std::shared_ptr{nullptr}) { // Retrieve the device image from the kernel bundle. - auto KernelBundle = KernelCG->getKernelBundle(); - kernel_id KernelID = - detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - - auto SyclKernel = detail::getSyclObjImpl( - KernelBundle->get_kernel(KernelID, KernelBundle)); - - DeviceImage = SyclKernel->getDeviceImage()->get_bin_image_ref(); - Program = SyclKernel->getDeviceImage()->get_ur_program_ref(); + DeviceImage = SyclKernelImpl->getDeviceImage()->get_bin_image_ref(); + Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref(); } else if (KernelCG->MSyclKernel != nullptr) { DeviceImage = KernelCG->MSyclKernel->getDeviceImage()->get_bin_image_ref(); Program = KernelCG->MSyclKernel->getDeviceImage()->get_ur_program_ref(); diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 1bbfb4d96f46d..d7bf8db57c627 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -9,10 +9,7 @@ #pragma once #include -#include -#include #include -#include #include #include #include @@ -97,7 +94,6 @@ class kernel_bundle_impl { make_error_code(errc::invalid), "Not all devices are associated with the context or " "vector of devices is empty"); - MIsInterop = true; } // Interop constructor @@ -346,138 +342,45 @@ class kernel_bundle_impl { } } - using include_pairs_t = - std::vector>; // oneapi_ext_kernel_compiler // construct from source string kernel_bundle_impl(const context &Context, syclex::source_language Lang, const std::string &Src, include_pairs_t IncludePairsVec) : MContext(Context), MDevices(Context.get_devices()), - MState(bundle_state::ext_oneapi_source), MLanguage(Lang), MSource(Src), - MIncludePairs(IncludePairsVec) {} + MDeviceImages{device_image_plain{std::make_shared( + Src, MContext, MDevices, Lang, std::move(IncludePairsVec))}}, + MUniqueDeviceImages{MDeviceImages[0].getMain()}, + MState(bundle_state::ext_oneapi_source) { + common_ctor_checks(); + } // oneapi_ext_kernel_compiler // construct from source bytes kernel_bundle_impl(const context &Context, syclex::source_language Lang, const std::vector &Bytes) : MContext(Context), MDevices(Context.get_devices()), - MState(bundle_state::ext_oneapi_source), MLanguage(Lang), - MSource(Bytes) {} - - // oneapi_ext_kernel_compiler - // interop constructor - kernel_bundle_impl(context Ctx, std::vector Devs, - device_image_plain &DevImage, - std::vector KNames, - syclex::source_language Lang) - : kernel_bundle_impl(Ctx, Devs, DevImage) { - MState = bundle_state::executable; - MKernelNames = std::move(KNames); - MLanguage = Lang; + MDeviceImages{device_image_plain{std::make_shared( + Bytes, MContext, MDevices, Lang)}}, + MUniqueDeviceImages{MDeviceImages[0].getMain()}, + MState(bundle_state::ext_oneapi_source) { + common_ctor_checks(); } // oneapi_ext_kernel_compiler - // program manager integration, only for sycl_jit language - kernel_bundle_impl( - context Ctx, std::vector Devs, - const std::vector &KernelIDs, - std::vector &&KernelNames, - std::unordered_map &&MangledKernelNames, - sycl_device_binaries Binaries, std::string &&Prefix, - syclex::source_language Lang) - : kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs, - bundle_state::executable) { - assert(Lang == syclex::source_language::sycl_jit); - // Mark this bundle explicitly as "interop" to ensure that its kernels are - // enqueued with the info from the kernel object passed by the application, - // cf. `enqueueImpKernel` in `commands.cpp`. While runtime-compiled kernels - // loaded via the program manager have `kernel_id`s, they can't be looked up - // from the (unprefixed) kernel name. - MIsInterop = true; - MKernelNames = std::move(KernelNames); - MMangledKernelNames = std::move(MangledKernelNames); - MDeviceBinaries = Binaries; - MPrefix = std::move(Prefix); - MLanguage = Lang; - } - - std::string trimXsFlags(std::string &str) { - // Trim first and last quote if they exist, but no others. - char EncounteredQuote = '\0'; - auto Start = std::find_if(str.begin(), str.end(), [&](char c) { - if (!EncounteredQuote && (c == '\'' || c == '"')) { - EncounteredQuote = c; - return false; - } - return !std::isspace(c); - }); - auto End = std::find_if(str.rbegin(), str.rend(), [&](char c) { - if (c == EncounteredQuote) { - EncounteredQuote = '\0'; - return false; - } - return !std::isspace(c); - }).base(); - if (Start != std::end(str) && End != std::begin(str) && Start < End) { - return std::string(Start, End); - } - - return ""; - } - - std::string extractXsFlags(const std::vector &BuildOptions) { - std::stringstream SS; - for (std::string Option : BuildOptions) { - auto Where = Option.find("-Xs"); - if (Where != std::string::npos) { - Where += 3; - std::string Flags = Option.substr(Where); - SS << trimXsFlags(Flags) << " "; - } - } - return SS.str(); - } - - bool - extKernelCompilerFetchFromCache(const std::vector Devices, - const std::vector &BuildOptions, - const std::string &SourceStr, - ur_program_handle_t &UrProgram) { - using ContextImplPtr = std::shared_ptr; - ContextImplPtr ContextImpl = getSyclObjImpl(MContext); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - - std::string UserArgs = syclex::detail::userArgsAsString(BuildOptions); - - std::vector DeviceHandles; - std::transform( - Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles), - [](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); }); - - std::vector Binaries; - std::vector Lengths; - std::vector> BinProgs = - PersistentDeviceCodeCache::getCompiledKernelFromDisc(Devices, UserArgs, - SourceStr); - if (BinProgs.empty()) { - return false; - } - for (auto &BinProg : BinProgs) { - Binaries.push_back((uint8_t *)(BinProg.data())); - Lengths.push_back(BinProg.size()); - } - - ur_program_properties_t Properties = {}; - Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; - Properties.pNext = nullptr; - Properties.count = 0; - Properties.pMetadatas = nullptr; - - Adapter->call( - ContextImpl->getHandleRef(), DeviceHandles.size(), DeviceHandles.data(), - Lengths.data(), Binaries.data(), &Properties, &UrProgram); + // construct from built source files + kernel_bundle_impl(const context &Context, const std::vector &Devs, + std::vector &&DevImgs, + std::vector &&DevBinaries) + : MContext(Context), MDevices(Devs), + MUniqueDeviceImages(std::move(DevImgs)), + MState(bundle_state::executable), + MDeviceBinaries(std::move(DevBinaries)) { + common_ctor_checks(); - return true; + removeDuplicateImages(); + MDeviceImages.reserve(MUniqueDeviceImages.size()); + for (const device_image_plain &DevImg : MUniqueDeviceImages) + MDeviceImages.emplace_back(DevImg); } std::shared_ptr @@ -487,281 +390,73 @@ class kernel_bundle_impl { const std::vector &RegisteredKernelNames) { assert(MState == bundle_state::ext_oneapi_source && "bundle_state::ext_oneapi_source required"); - - using ContextImplPtr = std::shared_ptr; - ContextImplPtr ContextImpl = getSyclObjImpl(MContext); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - - std::vector DeviceVec; - DeviceVec.reserve(Devices.size()); - for (const auto &SyclDev : Devices) { - ur_device_handle_t Dev = getSyclObjImpl(SyclDev)->getHandleRef(); - DeviceVec.push_back(Dev); - } - - if (MLanguage == syclex::source_language::sycl_jit) { - // Build device images via the program manager. - const std::string &SourceStr = std::get(MSource); - std::ostringstream SourceExt; - if (!RegisteredKernelNames.empty()) { - SourceExt << SourceStr << '\n'; - - auto EmitEntry = - [&SourceExt](const std::string &Name) -> std::ostringstream & { - SourceExt << " {\"" << Name << "\", " << Name << "}"; - return SourceExt; - }; - - SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; - for (auto It = RegisteredKernelNames.begin(), - SecondToLast = RegisteredKernelNames.end() - 1; - It != SecondToLast; ++It) { - EmitEntry(*It) << ",\n"; - } - EmitEntry(RegisteredKernelNames.back()) << "\n"; - SourceExt << ")]];\n"; - } - - auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_to_SPIRV( - RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), - MIncludePairs, BuildOptions, LogPtr); - - auto &PM = detail::ProgramManager::getInstance(); - PM.addImages(Binaries); - - std::vector KernelIDs; - std::vector KernelNames; - std::unordered_map MangledKernelNames; - for (const auto &KernelID : PM.getAllSYCLKernelIDs()) { - std::string_view KernelName{KernelID.get_name()}; - if (KernelName.find(Prefix) == 0) { - KernelIDs.push_back(KernelID); - KernelName.remove_prefix(Prefix.length()); - KernelNames.emplace_back(KernelName); - static constexpr std::string_view SYCLKernelMarker{"__sycl_kernel_"}; - if (KernelName.find(SYCLKernelMarker) == 0) { - // extern "C" declaration, implicitly register kernel without the - // marker. - std::string_view KernelNameWithoutMarker{KernelName}; - KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); - MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); - } - } - } - - // Apply frontend information. - for (const auto *RawImg : PM.getRawDeviceImages(KernelIDs)) { - for (const sycl_device_binary_property &RKProp : - RawImg->getRegisteredKernels()) { - - auto BA = DeviceBinaryProperty(RKProp).asByteArray(); - auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; - std::string_view MangledName{ - reinterpret_cast(BA.begin()), MangledNameLen}; - MangledKernelNames.emplace(RKProp->Name, MangledName); - } - } - - return std::make_shared( - MContext, MDevices, KernelIDs, std::move(KernelNames), - std::move(MangledKernelNames), Binaries, std::move(Prefix), - MLanguage); - } - - ur_program_handle_t UrProgram = nullptr; - // SourceStrPtr will be null when source is Spir-V bytes. - const std::string *SourceStrPtr = std::get_if(&MSource); - bool FetchedFromCache = false; - if (PersistentDeviceCodeCache::isEnabled() && SourceStrPtr) { - FetchedFromCache = extKernelCompilerFetchFromCache( - Devices, BuildOptions, *SourceStrPtr, UrProgram); - } - - if (!FetchedFromCache) { - const auto spirv = [&]() -> std::vector { - if (MLanguage == syclex::source_language::opencl) { - // if successful, the log is empty. if failed, throws an error with - // the compilation log. - std::vector IPVersionVec(Devices.size()); - std::transform(DeviceVec.begin(), DeviceVec.end(), - IPVersionVec.begin(), [&](ur_device_handle_t d) { - uint32_t ipVersion = 0; - Adapter->call( - d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - return ipVersion; - }); - return syclex::detail::OpenCLC_to_SPIRV(*SourceStrPtr, IPVersionVec, - BuildOptions, LogPtr); - } - if (MLanguage == syclex::source_language::spirv) { - const auto &SourceBytes = std::get>(MSource); - std::vector Result(SourceBytes.size()); - std::transform(SourceBytes.cbegin(), SourceBytes.cend(), - Result.begin(), - [](std::byte B) { return static_cast(B); }); - return Result; - } - if (MLanguage == syclex::source_language::sycl) { - return syclex::detail::SYCL_to_SPIRV(*SourceStrPtr, MIncludePairs, - BuildOptions, LogPtr, - RegisteredKernelNames); - } - throw sycl::exception( - make_error_code(errc::invalid), - "SYCL C++, OpenCL C and SPIR-V are the only supported " - "languages at this time"); - }(); - - Adapter->call( - ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr, - &UrProgram); - // program created by urProgramCreateWithIL is implicitly retained. - if (UrProgram == nullptr) - throw sycl::exception( - sycl::make_error_code(errc::invalid), - "urProgramCreateWithIL resulted in a null program handle."); - - } // if(!FetchedFromCache) - - std::string XsFlags = extractXsFlags(BuildOptions); - auto Res = Adapter->call_nocheck( - UrProgram, DeviceVec.size(), DeviceVec.data(), XsFlags.c_str()); - if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - Res = Adapter->call_nocheck( - ContextImpl->getHandleRef(), UrProgram, XsFlags.c_str()); + assert(allSourceBasedImages() && "All images must be source-based."); + + std::vector NewDevImgs; + std::vector NewDeviceBinaries; + for (device_image_plain &DevImg : MUniqueDeviceImages) { + std::vector> NewDevImgImpls = + getSyclObjImpl(DevImg)->buildFromSource(Devices, BuildOptions, LogPtr, + RegisteredKernelNames, + NewDeviceBinaries); + NewDevImgs.reserve(NewDevImgImpls.size()); + for (std::shared_ptr &DevImgImpl : NewDevImgImpls) + NewDevImgs.emplace_back(std::move(DevImgImpl)); } - Adapter->checkUrResult(Res); - - // Get the number of kernels in the program. - size_t NumKernels; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_NUM_KERNELS, sizeof(size_t), &NumKernels, - nullptr); - - // Get the kernel names. - size_t KernelNamesSize; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); - - // semi-colon delimited list of kernel names. - std::string KernelNamesStr(KernelNamesSize, ' '); - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); - std::vector KernelNames = - detail::split_string(KernelNamesStr, ';'); - - // make the device image and the kernel_bundle_impl - auto KernelIDs = std::make_shared>(); - auto DevImgImpl = std::make_shared( - nullptr, MContext, MDevices, bundle_state::executable, KernelIDs, - UrProgram); - device_image_plain DevImg{DevImgImpl}; - - // If caching enabled and kernel not fetched from cache, cache. - if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache && - SourceStrPtr) { - PersistentDeviceCodeCache::putCompiledKernelToDisc( - Devices, syclex::detail::userArgsAsString(BuildOptions), - *SourceStrPtr, UrProgram); - } - - return std::make_shared(MContext, MDevices, DevImg, - KernelNames, MLanguage); - } - - std::string adjust_kernel_name(const std::string &Name) { - if (MLanguage == syclex::source_language::sycl_jit) { - auto It = MMangledKernelNames.find(Name); - return It == MMangledKernelNames.end() ? Name : It->second; - } - - if (MLanguage == syclex::source_language::sycl) { - bool isMangled = Name.find("__sycl_kernel_") != std::string::npos; - return isMangled ? Name : "__sycl_kernel_" + Name; - } - - return Name; - } - - bool is_kernel_name(const std::string &Name) { - return std::find(MKernelNames.begin(), MKernelNames.end(), Name) != - MKernelNames.end(); + return std::make_shared( + MContext, Devices, std::move(NewDevImgs), std::move(NewDeviceBinaries)); } bool ext_oneapi_has_kernel(const std::string &Name) { - return is_kernel_name(adjust_kernel_name(Name)); + return std::any_of(begin(), end(), + [&Name](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->hasKernelName(Name); + }); } kernel ext_oneapi_get_kernel(const std::string &Name, const std::shared_ptr &Self) { - if (MKernelNames.empty()) + if (!hasSourceBasedImages()) throw sycl::exception(make_error_code(errc::invalid), "'ext_oneapi_get_kernel' is only available in " "kernel_bundles successfully built from " "kernel_bundle."); - std::string AdjustedName = adjust_kernel_name(Name); - if (!is_kernel_name(AdjustedName)) - throw sycl::exception(make_error_code(errc::invalid), - "kernel '" + Name + "' not found in kernel_bundle"); - - if (MLanguage == syclex::source_language::sycl_jit) { - auto &PM = ProgramManager::getInstance(); - auto KID = PM.getSYCLKernelID(MPrefix + AdjustedName); - - for (const auto &DevImgWithDeps : MDeviceImages) { - const auto &DevImg = DevImgWithDeps.getMain(); - if (!DevImg.has_kernel(KID)) - continue; - - const auto &DevImgImpl = getSyclObjImpl(DevImg); - auto UrProgram = DevImgImpl->get_ur_program_ref(); - auto [UrKernel, CacheMutex, ArgMask] = - PM.getOrCreateKernel(MContext, AdjustedName, - /*PropList=*/{}, UrProgram); - auto KernelImpl = std::make_shared( - UrKernel, getSyclObjImpl(MContext), DevImgImpl, Self, ArgMask, - UrProgram, CacheMutex); - return createSyclObjFromImpl(KernelImpl); - } - - assert(false && "Malformed RTC kernel bundle"); + // TODO: When linking is properly implemented for kernel compiler binaries, + // there can be scenarios where multiple binaries have the same + // kernels. In this case, all these bundles should be found and the + // resulting kernel object should be able to map devices to their + // respective backend kernel objects. + for (const device_image_plain &DevImg : MUniqueDeviceImages) { + const std::shared_ptr &DevImgImpl = + getSyclObjImpl(DevImg); + if (std::shared_ptr PotentialKernelImpl = + DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, + DevImgImpl)) + return detail::createSyclObjFromImpl(PotentialKernelImpl); } - - assert(MDeviceImages.size() > 0); - const std::shared_ptr &DeviceImageImpl = - detail::getSyclObjImpl(MDeviceImages[0].getMain()); - ur_program_handle_t UrProgram = DeviceImageImpl->get_ur_program_ref(); - ContextImplPtr ContextImpl = getSyclObjImpl(MContext); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - ur_kernel_handle_t UrKernel = nullptr; - Adapter->call(UrProgram, AdjustedName.c_str(), - &UrKernel); - // Kernel created by urKernelCreate is implicitly retained. - - std::shared_ptr KernelImpl = std::make_shared( - UrKernel, detail::getSyclObjImpl(MContext), Self); - - return detail::createSyclObjFromImpl(KernelImpl); + throw sycl::exception(make_error_code(errc::invalid), + "kernel '" + Name + "' not found in kernel_bundle"); } std::string ext_oneapi_get_raw_kernel_name(const std::string &Name) { - if (MKernelNames.empty()) + if (!hasSourceBasedImages()) throw sycl::exception( make_error_code(errc::invalid), "'ext_oneapi_get_raw_kernel_name' is only available in " "kernel_bundles successfully built from " "kernel_bundle."); - std::string AdjustedName = adjust_kernel_name(Name); - if (!is_kernel_name(AdjustedName)) + auto It = + std::find_if(begin(), end(), [&Name](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->hasKernelName(Name); + }); + if (It == end()) throw sycl::exception(make_error_code(errc::invalid), "kernel '" + Name + "' not found in kernel_bundle"); - return AdjustedName; + return getSyclObjImpl(*It)->adjustKernelName(Name); } bool empty() const noexcept { return MDeviceImages.empty(); } @@ -795,71 +490,12 @@ class kernel_bundle_impl { kernel get_kernel(const kernel_id &KernelID, const std::shared_ptr &Self) const { - using ImageImpl = std::shared_ptr; - // Selected image. - ImageImpl SelectedImage = nullptr; - // Image where specialization constants are replaced with default values. - ImageImpl ImageWithReplacedSpecConsts = nullptr; - // Original image where specialization constants are not replaced with - // default values. - ImageImpl OriginalImage = nullptr; - // Used to track if any of the candidate images has specialization values - // set. - bool SpecConstsSet = false; - for (const DevImgPlainWithDeps &DeviceImageWithDeps : MDeviceImages) { - const device_image_plain &DeviceImage = DeviceImageWithDeps.getMain(); - if (!DeviceImageWithDeps.getMain().has_kernel(KernelID)) - continue; - - const auto DeviceImageImpl = detail::getSyclObjImpl(DeviceImage); - SpecConstsSet |= DeviceImageImpl->is_any_specialization_constant_set(); - - // Remember current image in corresponding variable depending on whether - // specialization constants are replaced with default value or not. - (DeviceImageImpl->specialization_constants_replaced_with_default() - ? ImageWithReplacedSpecConsts - : OriginalImage) = DeviceImageImpl; - - if (SpecConstsSet) { - // If specialization constant is set in any of the candidate images - // then we can't use ReplacedImage, so we select NativeImage if any or - // we select OriginalImage and keep iterating in case there is an image - // with native support. - SelectedImage = OriginalImage; - if (SelectedImage && - SelectedImage->all_specialization_constant_native()) - break; - } else { - // For now select ReplacedImage but it may be reset if any of the - // further device images has specialization constant value set. If after - // all iterations specialization constant values are not set in any of - // the candidate images then that will be the selected image. - // Also we don't want to use ReplacedImage if device image has native - // support. - if (ImageWithReplacedSpecConsts && - !ImageWithReplacedSpecConsts->all_specialization_constant_native()) - SelectedImage = ImageWithReplacedSpecConsts; - else - // In case if we don't have or don't use ReplacedImage. - SelectedImage = OriginalImage; - } - } - - if (!SelectedImage) - throw sycl::exception(make_error_code(errc::invalid), - "The kernel bundle does not contain the kernel " - "identified by kernelId."); - - auto [Kernel, CacheMutex, ArgMask] = - detail::ProgramManager::getInstance().getOrCreateKernel( - MContext, KernelID.get_name(), /*PropList=*/{}, - SelectedImage->get_ur_program_ref()); - - std::shared_ptr KernelImpl = std::make_shared( - Kernel, detail::getSyclObjImpl(MContext), SelectedImage, Self, ArgMask, - SelectedImage->get_ur_program_ref(), CacheMutex); - - return detail::createSyclObjFromImpl(KernelImpl); + if (std::shared_ptr KernelImpl = + tryGetOfflineKernel(KernelID, Self)) + return detail::createSyclObjFromImpl(KernelImpl); + throw sycl::exception(make_error_code(errc::invalid), + "The kernel bundle does not contain the kernel " + "identified by kernelId."); } bool has_kernel(const kernel_id &KernelID) const noexcept { @@ -961,8 +597,6 @@ class kernel_bundle_impl { return MSpecConstValues; } - bool isInterop() const { return MIsInterop; } - bool add_kernel(const kernel_id &KernelID, const device &Dev) { // Skip if kernel is already there if (has_kernel(KernelID, Dev)) @@ -994,15 +628,124 @@ class kernel_bundle_impl { ~kernel_bundle_impl() { try { - if (MDeviceBinaries) { - ProgramManager::getInstance().removeImages(MDeviceBinaries); - syclex::detail::SYCL_JIT_destroy(MDeviceBinaries); + for (sycl_device_binaries &Binaries : MDeviceBinaries) { + ProgramManager::getInstance().removeImages(Binaries); + syclex::detail::SYCL_JIT_destroy(Binaries); } } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~kernel_bundle_impl", e); } } + bool hasSourceBasedImages() const noexcept { + return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & + ImageOriginKernelCompiler; + }); + } + + bool hasSYCLOfflineImages() const noexcept { + return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & ImageOriginSYCLOffline; + }); + } + + bool allSourceBasedImages() const noexcept { + return std::all_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & + ImageOriginKernelCompiler; + }); + } + + std::shared_ptr tryGetOfflineKernel( + const kernel_id &KernelID, + const std::shared_ptr &Self) const { + using ImageImpl = std::shared_ptr; + // Selected image. + ImageImpl SelectedImage = nullptr; + // Image where specialization constants are replaced with default values. + ImageImpl ImageWithReplacedSpecConsts = nullptr; + // Original image where specialization constants are not replaced with + // default values. + ImageImpl OriginalImage = nullptr; + // Used to track if any of the candidate images has specialization values + // set. + bool SpecConstsSet = false; + for (const DevImgPlainWithDeps &DeviceImageWithDeps : MDeviceImages) { + const device_image_plain &DeviceImage = DeviceImageWithDeps.getMain(); + if (!DeviceImageWithDeps.getMain().has_kernel(KernelID)) + continue; + + const auto DeviceImageImpl = detail::getSyclObjImpl(DeviceImage); + SpecConstsSet |= DeviceImageImpl->is_any_specialization_constant_set(); + + // Remember current image in corresponding variable depending on whether + // specialization constants are replaced with default value or not. + (DeviceImageImpl->specialization_constants_replaced_with_default() + ? ImageWithReplacedSpecConsts + : OriginalImage) = DeviceImageImpl; + + if (SpecConstsSet) { + // If specialization constant is set in any of the candidate images + // then we can't use ReplacedImage, so we select NativeImage if any or + // we select OriginalImage and keep iterating in case there is an image + // with native support. + SelectedImage = OriginalImage; + if (SelectedImage && + SelectedImage->all_specialization_constant_native()) + break; + } else { + // For now select ReplacedImage but it may be reset if any of the + // further device images has specialization constant value set. If after + // all iterations specialization constant values are not set in any of + // the candidate images then that will be the selected image. + // Also we don't want to use ReplacedImage if device image has native + // support. + if (ImageWithReplacedSpecConsts && + !ImageWithReplacedSpecConsts->all_specialization_constant_native()) + SelectedImage = ImageWithReplacedSpecConsts; + else + // In case if we don't have or don't use ReplacedImage. + SelectedImage = OriginalImage; + } + } + + if (!SelectedImage) + return nullptr; + + auto [Kernel, CacheMutex, ArgMask] = + detail::ProgramManager::getInstance().getOrCreateKernel( + MContext, KernelID.get_name(), /*PropList=*/{}, + SelectedImage->get_ur_program_ref()); + + return std::make_shared( + Kernel, detail::getSyclObjImpl(MContext), SelectedImage, Self, ArgMask, + SelectedImage->get_ur_program_ref(), CacheMutex); + } + + std::shared_ptr + tryGetKernel(const std::string &Name, + const std::shared_ptr &Self) const { + // TODO: For source-based kernels, it may be faster to keep a map between + // {kernel_name, device} and their corresponding image. + // First look through the kernels registered in source-based images. + for (const device_image_plain &DevImg : MUniqueDeviceImages) { + const std::shared_ptr &DevImgImpl = + getSyclObjImpl(DevImg); + if (std::shared_ptr SourceBasedKernel = + DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, + DevImgImpl)) + return SourceBasedKernel; + } + + // Fall back to regular offline compiled kernel_bundle look-up. + if (std::optional MaybeKernelID = + sycl::detail::ProgramManager::getInstance().tryGetSYCLKernelID( + Name)) + return tryGetOfflineKernel(*MaybeKernelID, Self); + return nullptr; + } + private: void fillUniqueDeviceImages() { assert(MUniqueDeviceImages.empty()); @@ -1025,19 +768,11 @@ class kernel_bundle_impl { // This map stores values for specialization constants, that are missing // from any device image. SpecConstMapT MSpecConstValues; - bool MIsInterop = false; bundle_state MState; - // ext_oneapi_kernel_compiler : Source, Languauge, KernelNames, IncludePairs - // Language is for both state::source and state::executable. - syclex::source_language MLanguage = syclex::source_language::opencl; - const std::variant> MSource; - // only kernel_bundles created from source have KernelNames member. - std::vector MKernelNames; - std::unordered_map MMangledKernelNames; - sycl_device_binaries MDeviceBinaries = nullptr; - std::string MPrefix; - include_pairs_t MIncludePairs; + // For sycl_jit, building from source may have produced sycl binaries that + // this kernel_bundle now manages. + std::vector MDeviceBinaries; }; } // namespace detail diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 3bbff52ae4f2b..b500f1fbff3ed 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -50,11 +50,11 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, const KernelArgMask *ArgMask, ur_program_handle_t Program, std::mutex *CacheMutex) : MKernel(Kernel), MContext(std::move(ContextImpl)), MProgram(Program), - MCreatedFromSource(false), MDeviceImageImpl(std::move(DeviceImageImpl)), + MCreatedFromSource(DeviceImageImpl->isNonSYCLJITSourceBased()), + MDeviceImageImpl(std::move(DeviceImageImpl)), MKernelBundleImpl(std::move(KernelBundleImpl)), - MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} { - MIsInterop = MKernelBundleImpl->isInterop(); -} + MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), + MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} {} kernel_impl::~kernel_impl() { try { @@ -81,6 +81,19 @@ bool kernel_impl::isCreatedFromSource() const { return MCreatedFromSource; } +bool kernel_impl::isInteropOrSourceBased() const noexcept { + return isInterop() || + (MDeviceImageImpl && + (MDeviceImageImpl->getOriginMask() & ImageOriginKernelCompiler)); +} + +bool kernel_impl::hasSYCLMetadata() const noexcept { + return !isInteropOrSourceBased() || + (MDeviceImageImpl && + MDeviceImageImpl->isFromSourceLanguage( + sycl::ext::oneapi::experimental::source_language::sycl_jit)); +} + bool kernel_impl::isBuiltInKernel(const device &Device) const { auto BuiltInKernels = Device.get_info(); if (BuiltInKernels.empty()) @@ -92,7 +105,7 @@ bool kernel_impl::isBuiltInKernel(const device &Device) const { } void kernel_impl::checkIfValidForNumArgsInfoQuery() const { - if (MKernelBundleImpl->isInterop()) + if (isInteropOrSourceBased()) return; auto Devices = MKernelBundleImpl->get_devices(); if (std::any_of(Devices.begin(), Devices.end(), diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 570d912cdabb4..ec1500f7add09 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -207,6 +207,9 @@ class kernel_impl { /// \return true if kernel was created from source. bool isCreatedFromSource() const; + bool isInteropOrSourceBased() const noexcept; + bool hasSYCLMetadata() const noexcept; + const DeviceImageImplPtr &getDeviceImage() const { return MDeviceImageImpl; } ur_native_handle_t getNative() const { diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 8148b413f0ecc..93db57a261dbb 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1798,205 +1798,215 @@ ProgramManager::kernelImplicitLocalArgPos(const std::string &KernelName) const { return {}; } -void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { +void ProgramManager::addImage(sycl_device_binary RawImg, + RTDeviceBinaryImage **OutImage, + std::vector *OutKernelIDs) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; - for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { - sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); - const sycl_offload_entry EntriesB = RawImg->EntriesBegin; - const sycl_offload_entry EntriesE = RawImg->EntriesEnd; - // Treat the image as empty one - if (EntriesB == EntriesE) - continue; + const sycl_offload_entry EntriesB = RawImg->EntriesBegin; + const sycl_offload_entry EntriesE = RawImg->EntriesEnd; + // Treat the image as empty one + if (EntriesB == EntriesE) + return; - std::unique_ptr Img; - if (isDeviceImageCompressed(RawImg)) + std::unique_ptr Img; + if (isDeviceImageCompressed(RawImg)) #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE - Img = std::make_unique(RawImg); + Img = std::make_unique(RawImg); #else - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Recieved a compressed device image, but " - "SYCL RT was built without ZSTD support." - "Aborting. "); + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Recieved a compressed device image, but " + "SYCL RT was built without ZSTD support." + "Aborting. "); #endif - else - Img = std::make_unique(RawImg); + else + Img = std::make_unique(RawImg); - static uint32_t SequenceID = 0; + // If an output image is requested, set it to the newly allocated image. + if (OutImage) + *OutImage = Img.get(); - // Fill the kernel argument mask map - const RTDeviceBinaryImage::PropertyRange &KPOIRange = - Img->getKernelParamOptInfo(); - if (KPOIRange.isAvailable()) { - KernelNameToArgMaskMap &ArgMaskMap = - m_EliminatedKernelArgMasks[Img.get()]; - for (const auto &Info : KPOIRange) - ArgMaskMap[Info->Name] = - createKernelArgMask(DeviceBinaryProperty(Info).asByteArray()); - } + static uint32_t SequenceID = 0; - // Fill maps for kernel bundles - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // Fill the kernel argument mask map + const RTDeviceBinaryImage::PropertyRange &KPOIRange = + Img->getKernelParamOptInfo(); + if (KPOIRange.isAvailable()) { + KernelNameToArgMaskMap &ArgMaskMap = m_EliminatedKernelArgMasks[Img.get()]; + for (const auto &Info : KPOIRange) + ArgMaskMap[Info->Name] = + createKernelArgMask(DeviceBinaryProperty(Info).asByteArray()); + } - // Register all exported symbols - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); - } + // Fill maps for kernel bundles + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - // Record mapping between virtual function sets and device images - for (const sycl_device_binary_property &VFProp : - Img->getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); - for (const auto &SetName : detail::split_string(StrValue, ',')) - m_VFSet2BinImage[SetName].insert(Img.get()); - } + // Register all exported symbols + for (const sycl_device_binary_property &ESProp : Img->getExportedSymbols()) { + m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + } - if (DumpImages) { - const bool NeedsSequenceID = std::any_of( - m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(), - [&](auto &CurrentImg) { - return CurrentImg.first->getFormat() == Img->getFormat(); - }); + // Record mapping between virtual function sets and device images + for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { + std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + for (const auto &SetName : detail::split_string(StrValue, ',')) + m_VFSet2BinImage[SetName].insert(Img.get()); + } - // Check if image is compressed, and decompress it before dumping. - CheckAndDecompressImage(Img.get()); + if (DumpImages) { + const bool NeedsSequenceID = + std::any_of(m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(), + [&](auto &CurrentImg) { + return CurrentImg.first->getFormat() == Img->getFormat(); + }); - dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0); - } + // Check if image is compressed, and decompress it before dumping. + CheckAndDecompressImage(Img.get()); - m_BinImg2KernelIDs[Img.get()].reset(new std::vector); + dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0); + } - for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - EntriesIt = EntriesIt->Increment()) { + std::shared_ptr> &KernelIDs = + m_BinImg2KernelIDs[Img.get()]; + KernelIDs.reset(new std::vector); - auto name = EntriesIt->GetName(); + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + EntriesIt = EntriesIt->Increment()) { - // Skip creating unique kernel ID if it is a service kernel. - // SYCL service kernels are identified by having - // __sycl_service_kernel__ in the mangled name, primarily as part of - // the namespace of the name type. - if (std::strstr(name, "__sycl_service_kernel__")) { - m_ServiceKernels.insert(std::make_pair(name, Img.get())); - continue; - } + auto name = EntriesIt->GetName(); - // Skip creating unique kernel ID if it is an exported device - // function. Exported device functions appear in the offload entries - // among kernels, but are identifiable by being listed in properties. - if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end()) - continue; + // Skip creating unique kernel ID if it is a service kernel. + // SYCL service kernels are identified by having + // __sycl_service_kernel__ in the mangled name, primarily as part of + // the namespace of the name type. + if (std::strstr(name, "__sycl_service_kernel__")) { + m_ServiceKernels.insert(std::make_pair(name, Img.get())); + continue; + } - // ... and create a unique kernel ID for the entry - auto It = m_KernelName2KernelIDs.find(name); - if (It == m_KernelName2KernelIDs.end()) { - std::shared_ptr KernelIDImpl = - std::make_shared(name); - sycl::kernel_id KernelID = - detail::createSyclObjFromImpl(KernelIDImpl); + // Skip creating unique kernel ID if it is an exported device + // function. Exported device functions appear in the offload entries + // among kernels, but are identifiable by being listed in properties. + if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end()) + continue; - It = m_KernelName2KernelIDs.emplace_hint(It, name, KernelID); - } - m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); - m_BinImg2KernelIDs[Img.get()]->push_back(It->second); + // ... and create a unique kernel ID for the entry + auto It = m_KernelName2KernelIDs.find(name); + if (It == m_KernelName2KernelIDs.end()) { + std::shared_ptr KernelIDImpl = + std::make_shared(name); + sycl::kernel_id KernelID = + detail::createSyclObjFromImpl(KernelIDImpl); + + It = m_KernelName2KernelIDs.emplace_hint(It, name, KernelID); } + m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); + KernelIDs->push_back(It->second); + } - cacheKernelUsesAssertInfo(*Img); + cacheKernelUsesAssertInfo(*Img); - // check if kernel uses sanitizer - { - sycl_device_binary_property SanProp = Img->getProperty("sanUsed"); - if (SanProp) { - std::string SanValue = - detail::DeviceBinaryProperty(SanProp).asCString(); - - if (SanValue.rfind("asan", 0) == 0) { // starts_with - m_SanitizerFoundInImage = SanitizerType::AddressSanitizer; - } else if (SanValue.rfind("msan", 0) == 0) { - m_SanitizerFoundInImage = SanitizerType::MemorySanitizer; - } else if (SanValue.rfind("tsan", 0) == 0) { - m_SanitizerFoundInImage = SanitizerType::ThreadSanitizer; - } + // check if kernel uses sanitizer + { + sycl_device_binary_property SanProp = Img->getProperty("sanUsed"); + if (SanProp) { + std::string SanValue = detail::DeviceBinaryProperty(SanProp).asCString(); + + if (SanValue.rfind("asan", 0) == 0) { // starts_with + m_SanitizerFoundInImage = SanitizerType::AddressSanitizer; + } else if (SanValue.rfind("msan", 0) == 0) { + m_SanitizerFoundInImage = SanitizerType::MemorySanitizer; + } else if (SanValue.rfind("tsan", 0) == 0) { + m_SanitizerFoundInImage = SanitizerType::ThreadSanitizer; } } + } - cacheKernelImplicitLocalArg(*Img); + cacheKernelImplicitLocalArg(*Img); - // Sort kernel ids for faster search - std::sort(m_BinImg2KernelIDs[Img.get()]->begin(), - m_BinImg2KernelIDs[Img.get()]->end(), LessByHash{}); + // Sort kernel ids for faster search + std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash{}); - // ... and initialize associated device_global information - { - std::lock_guard DeviceGlobalsGuard(m_DeviceGlobalsMutex); + // If requested, copy the new (sorted) kernel IDs. + if (OutKernelIDs) + OutKernelIDs->insert(OutKernelIDs->end(), KernelIDs->begin(), + KernelIDs->end()); - auto DeviceGlobals = Img->getDeviceGlobals(); - for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { - ByteArray DeviceGlobalInfo = - DeviceBinaryProperty(DeviceGlobal).asByteArray(); - - // The supplied device_global info property is expected to contain: - // * 8 bytes - Size of the property. - // * 4 bytes - Size of the underlying type in the device_global. - // * 4 bytes - 0 if device_global has device_image_scope and any value - // otherwise. - DeviceGlobalInfo.dropBytes(8); - auto [TypeSize, DeviceImageScopeDecorated] = - DeviceGlobalInfo.consume(); - assert(DeviceGlobalInfo.empty() && "Extra data left!"); - - // Give the image pointer as an identifier for the image the - // device-global is associated with. - - auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name); - if (ExistingDeviceGlobal != m_DeviceGlobals.end()) { - // If it has already been registered we update the information. - ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize, - DeviceImageScopeDecorated); - } else { - // If it has not already been registered we create a new entry. - // Note: Pointer to the device global is not available here, so it - // cannot be set until registration happens. - auto EntryUPtr = std::make_unique( - DeviceGlobal->Name, Img.get(), TypeSize, - DeviceImageScopeDecorated); - m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); - } + // ... and initialize associated device_global information + { + std::lock_guard DeviceGlobalsGuard(m_DeviceGlobalsMutex); + + auto DeviceGlobals = Img->getDeviceGlobals(); + for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { + ByteArray DeviceGlobalInfo = + DeviceBinaryProperty(DeviceGlobal).asByteArray(); + + // The supplied device_global info property is expected to contain: + // * 8 bytes - Size of the property. + // * 4 bytes - Size of the underlying type in the device_global. + // * 4 bytes - 0 if device_global has device_image_scope and any value + // otherwise. + DeviceGlobalInfo.dropBytes(8); + auto [TypeSize, DeviceImageScopeDecorated] = + DeviceGlobalInfo.consume(); + assert(DeviceGlobalInfo.empty() && "Extra data left!"); + + // Give the image pointer as an identifier for the image the + // device-global is associated with. + + auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name); + if (ExistingDeviceGlobal != m_DeviceGlobals.end()) { + // If it has already been registered we update the information. + ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize, + DeviceImageScopeDecorated); + } else { + // If it has not already been registered we create a new entry. + // Note: Pointer to the device global is not available here, so it + // cannot be set until registration happens. + auto EntryUPtr = std::make_unique( + DeviceGlobal->Name, Img.get(), TypeSize, DeviceImageScopeDecorated); + m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); } } - // ... and initialize associated host_pipe information - { - std::lock_guard HostPipesGuard(m_HostPipesMutex); - auto HostPipes = Img->getHostPipes(); - for (const sycl_device_binary_property &HostPipe : HostPipes) { - ByteArray HostPipeInfo = DeviceBinaryProperty(HostPipe).asByteArray(); - - // The supplied host_pipe info property is expected to contain: - // * 8 bytes - Size of the property. - // * 4 bytes - Size of the underlying type in the host_pipe. - // Note: Property may be padded. - - HostPipeInfo.dropBytes(8); - auto TypeSize = HostPipeInfo.consume(); - assert(HostPipeInfo.empty() && "Extra data left!"); - - auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name); - if (ExistingHostPipe != m_HostPipes.end()) { - // If it has already been registered we update the information. - ExistingHostPipe->second->initialize(TypeSize); - ExistingHostPipe->second->initialize(Img.get()); - } else { - // If it has not already been registered we create a new entry. - // Note: Pointer to the host pipe is not available here, so it - // cannot be set until registration happens. - auto EntryUPtr = - std::make_unique(HostPipe->Name, TypeSize); - EntryUPtr->initialize(Img.get()); - m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr)); - } + } + // ... and initialize associated host_pipe information + { + std::lock_guard HostPipesGuard(m_HostPipesMutex); + auto HostPipes = Img->getHostPipes(); + for (const sycl_device_binary_property &HostPipe : HostPipes) { + ByteArray HostPipeInfo = DeviceBinaryProperty(HostPipe).asByteArray(); + + // The supplied host_pipe info property is expected to contain: + // * 8 bytes - Size of the property. + // * 4 bytes - Size of the underlying type in the host_pipe. + // Note: Property may be padded. + + HostPipeInfo.dropBytes(8); + auto TypeSize = HostPipeInfo.consume(); + assert(HostPipeInfo.empty() && "Extra data left!"); + + auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name); + if (ExistingHostPipe != m_HostPipes.end()) { + // If it has already been registered we update the information. + ExistingHostPipe->second->initialize(TypeSize); + ExistingHostPipe->second->initialize(Img.get()); + } else { + // If it has not already been registered we create a new entry. + // Note: Pointer to the host pipe is not available here, so it + // cannot be set until registration happens. + auto EntryUPtr = + std::make_unique(HostPipe->Name, TypeSize); + EntryUPtr->initialize(Img.get()); + m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr)); } } - m_DeviceImages.insert({RawImg, std::move(Img)}); } + + m_DeviceImages.insert({RawImg, std::move(Img)}); +} + +void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { + for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) + addImage(&(DeviceBinary->DeviceBinaries[I])); } void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { @@ -2219,17 +2229,24 @@ static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { : sycl::bundle_state::object; } -kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) { +std::optional +ProgramManager::tryGetSYCLKernelID(const std::string &KernelName) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); auto KernelID = m_KernelName2KernelIDs.find(KernelName); if (KernelID == m_KernelName2KernelIDs.end()) - throw exception(make_error_code(errc::runtime), - "No kernel found with the specified name"); + return std::nullopt; return KernelID->second; } +kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) { + if (std::optional MaybeKernelID = tryGetSYCLKernelID(KernelName)) + return *MaybeKernelID; + throw exception(make_error_code(errc::runtime), + "No kernel found with the specified name"); +} + bool ProgramManager::hasCompatibleImage(const device &Dev) { std::lock_guard Guard(m_KernelIDsMutex); @@ -2554,67 +2571,69 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( return SYCLDeviceImages; } -void ProgramManager::bringSYCLDeviceImagesToState( - std::vector &DeviceImages, bundle_state TargetState) { - - for (DevImgPlainWithDeps &ImgWithDeps : DeviceImages) { - device_image_plain &MainImg = ImgWithDeps.getMain(); - const bundle_state DevImageState = getSyclObjImpl(MainImg)->get_state(); - // At this time, there is no circumstance where a device image should ever - // be in the source state. That not good. - assert(DevImageState != bundle_state::ext_oneapi_source); - - switch (TargetState) { +void ProgramManager::bringSYCLDeviceImageToState( + DevImgPlainWithDeps &DeviceImage, bundle_state TargetState) { + device_image_plain &MainImg = DeviceImage.getMain(); + const bundle_state DevImageState = getSyclObjImpl(MainImg)->get_state(); + // At this time, there is no circumstance where a device image should ever + // be in the source state. That not good. + assert(DevImageState != bundle_state::ext_oneapi_source); + + switch (TargetState) { + case bundle_state::ext_oneapi_source: + // This case added for switch statement completion. We should not be here. + assert(DevImageState == bundle_state::ext_oneapi_source); + break; + case bundle_state::input: + // Do nothing since there is no state which can be upgraded to the input. + assert(DevImageState == bundle_state::input); + break; + case bundle_state::object: + if (DevImageState == bundle_state::input) { + DeviceImage = compile(DeviceImage, getSyclObjImpl(MainImg)->get_devices(), + /*PropList=*/{}); + break; + } + // Device image is expected to be object state then. + assert(DevImageState == bundle_state::object); + break; + case bundle_state::executable: { + switch (DevImageState) { case bundle_state::ext_oneapi_source: - // This case added for switch statement completion. We should not be here. - assert(DevImageState == bundle_state::ext_oneapi_source); + // This case added for switch statement completion. + // We should not be here. + assert(DevImageState != bundle_state::ext_oneapi_source); break; case bundle_state::input: - // Do nothing since there is no state which can be upgraded to the input. - assert(DevImageState == bundle_state::input); - break; - case bundle_state::object: - if (DevImageState == bundle_state::input) { - ImgWithDeps = - compile(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), - /*PropList=*/{}); - break; - } - // Device image is expected to be object state then. - assert(DevImageState == bundle_state::object); + DeviceImage = build(DeviceImage, getSyclObjImpl(MainImg)->get_devices(), + /*PropList=*/{}); break; - case bundle_state::executable: { - switch (DevImageState) { - case bundle_state::ext_oneapi_source: - // This case added for switch statement completion. - // We should not be here. - assert(DevImageState != bundle_state::ext_oneapi_source); - break; - case bundle_state::input: - ImgWithDeps = build(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), - /*PropList=*/{}); - break; - case bundle_state::object: { - std::vector LinkedDevImages = - link(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), - /*PropList=*/{}); - // Since only one device image is passed here one output device image is - // expected - assert(LinkedDevImages.size() == 1 && "Expected one linked image here"); - ImgWithDeps = LinkedDevImages[0]; - break; - } - case bundle_state::executable: - ImgWithDeps = build(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), - /*PropList=*/{}); - break; - } + case bundle_state::object: { + std::vector LinkedDevImages = + link(DeviceImage, getSyclObjImpl(MainImg)->get_devices(), + /*PropList=*/{}); + // Since only one device image is passed here one output device image is + // expected + assert(LinkedDevImages.size() == 1 && "Expected one linked image here"); + DeviceImage = LinkedDevImages[0]; break; } + case bundle_state::executable: + DeviceImage = build(DeviceImage, getSyclObjImpl(MainImg)->get_devices(), + /*PropList=*/{}); + break; } + break; + } } } +void ProgramManager::bringSYCLDeviceImagesToState( + std::vector &DeviceImages, bundle_state TargetState) { + for (DevImgPlainWithDeps &ImgWithDeps : DeviceImages) + bringSYCLDeviceImageToState(ImgWithDeps, TargetState); +} + std::vector ProgramManager::getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, @@ -2713,11 +2732,14 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) setSpecializationConstants(InputImpl, Prog, Adapter); + std::optional RTCInfo = + InputImpl->getRTCInfo(); DeviceImageImplPtr ObjectImpl = std::make_shared( InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs, bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog, InputImpl->get_spec_const_data_ref(), - InputImpl->get_spec_const_blob_ref()); + InputImpl->get_spec_const_blob_ref(), InputImpl->getOriginMask(), + std::move(RTCInfo)); std::string CompileOptions; applyCompileOptionsFromEnvironment(CompileOptions); @@ -2870,11 +2892,24 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, } } + // The origin becomes the combination of all the origins. + uint8_t CombinedOrigins = 0; + for (const device_image_plain &DevImg : ImgWithDeps) + CombinedOrigins |= getSyclObjImpl(DevImg)->getOriginMask(); + + std::vector *> + RTCInfoPtrs; + RTCInfoPtrs.reserve(ImgWithDeps.size()); + for (const device_image_plain &DevImg : ImgWithDeps) + RTCInfoPtrs.emplace_back(&(getSyclObjImpl(DevImg)->getRTCInfo())); + auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); + auto BinImg = getSyclObjImpl(MainImg)->get_bin_image_ref(); DeviceImageImplPtr ExecutableImpl = std::make_shared( BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), - LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob)); + LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), + CombinedOrigins, std::move(MergedRTCInfo)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -2927,13 +2962,26 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, SpecConstMap = MainInputImpl->get_spec_const_data_ref(); } + // The origin becomes the combination of all the origins. + uint8_t CombinedOrigins = 0; + for (const device_image_plain &DevImg : DevImgWithDeps) + CombinedOrigins |= getSyclObjImpl(DevImg)->getOriginMask(); + + std::vector *> + RTCInfoPtrs; + RTCInfoPtrs.reserve(DevImgWithDeps.size()); + for (const device_image_plain &DevImg : DevImgWithDeps) + RTCInfoPtrs.emplace_back(&(getSyclObjImpl(DevImg)->getRTCInfo())); + auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); + ur_program_handle_t ResProgram = getBuiltURProgram( std::move(BinImgs), Context, Devs, &DevImgWithDeps, SpecConstBlob); DeviceImageImplPtr ExecImpl = std::make_shared( MainInputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable, std::move(KernelIDs), ResProgram, - std::move(SpecConstMap), std::move(SpecConstBlob)); + std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, + std::move(MergedRTCInfo)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 96f2ef63003dc..f09e7f35bde7e 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -215,6 +215,9 @@ class ProgramManager { ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr Context); + void addImage(sycl_device_binary RawImg, + RTDeviceBinaryImage **OutImage = nullptr, + std::vector *OutKernelIDs = nullptr); void addImages(sycl_device_binaries DeviceImages); void removeImages(sycl_device_binaries DeviceImages); void debugPrintBinaryImages() const; @@ -232,7 +235,11 @@ class ProgramManager { const std::string &KernelName); // The function returns the unique SYCL kernel identifier associated with a - // kernel name. + // kernel name or nullopt if there is no such ID. + std::optional tryGetSYCLKernelID(const std::string &KernelName); + + // The function returns the unique SYCL kernel identifier associated with a + // kernel name or throws a sycl exception if there is no such ID. kernel_id getSYCLKernelID(const std::string &KernelName); // The function returns a vector containing all unique SYCL kernel identifiers @@ -282,7 +289,11 @@ class ProgramManager { const context &Ctx, const std::vector &Devs, bundle_state TargetState, const std::vector &KernelIDs = {}); - // Brind images in the passed vector to the required state. Does it inplace + // Bring image to the required state. Does it inplace + void bringSYCLDeviceImageToState(DevImgPlainWithDeps &DeviceImage, + bundle_state TargetState); + + // Bring images in the passed vector to the required state. Does it inplace void bringSYCLDeviceImagesToState(std::vector &DeviceImages, bundle_state TargetState); diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d8ca3fb8c1544..9f6749dafc109 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -392,9 +392,10 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, if (IsKernel) // Kernel only uses assert if it's non interop one - KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) && - ProgramManager::getInstance().kernelUsesAssert( - Handler.MKernelName.c_str()); + KernelUsesAssert = + (!Handler.MKernel || Handler.MKernel->hasSYCLMetadata()) && + ProgramManager::getInstance().kernelUsesAssert( + Handler.MKernelName.c_str()); finalizeHandler(Handler, Event); PostProcess(IsKernel, KernelUsesAssert, Event); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 7094fc2cc2afd..ef46df5f1d32d 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1995,24 +1995,12 @@ void instrumentationAddExtraKernelMetadata( std::mutex *KernelMutex = nullptr; const KernelArgMask *EliminatedArgMask = nullptr; - std::shared_ptr SyclKernelImpl; - std::shared_ptr DeviceImageImpl; - - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the - // kernels in interop kernel bundles (if any) do not have kernel_id and - // can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - kernel_id KernelID = - detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - std::shared_ptr KernelImpl = - detail::getSyclObjImpl(SyclKernel); - - EliminatedArgMask = KernelImpl->getKernelArgMask(); - Program = KernelImpl->getDeviceImage()->get_ur_program_ref(); + if (auto SyclKernelImpl = KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel( + KernelName, KernelBundleImplPtr) + : std::shared_ptr{nullptr}) { + EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); + Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref(); } else if (nullptr != SyclKernel) { Program = SyclKernel->getProgramRef(); if (!SyclKernel->isCreatedFromSource()) @@ -2527,21 +2515,13 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, ur_kernel_handle_t UrKernel = nullptr; std::shared_ptr DeviceImageImpl = nullptr; const KernelArgMask *EliminatedArgMask = nullptr; + auto &KernelBundleImplPtr = CommandGroup.MKernelBundle; - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the kernels - // in interop kernel bundles (if any) do not have kernel_id - // and can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (auto KernelBundleImplPtr = CommandGroup.MKernelBundle; - KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( - CommandGroup.MKernelName); - - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - - auto SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); + if (auto SyclKernelImpl = + KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel(CommandGroup.MKernelName, + KernelBundleImplPtr) + : std::shared_ptr{nullptr}) { UrKernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); @@ -2694,19 +2674,10 @@ void enqueueImpKernel( std::shared_ptr SyclKernelImpl; std::shared_ptr DeviceImageImpl; - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the kernels - // in interop kernel bundles (if any) do not have kernel_id - // and can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - kernel_id KernelID = - detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - - SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); - + if ((SyclKernelImpl = KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel( + KernelName, KernelBundleImplPtr) + : std::shared_ptr{nullptr})) { Kernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); @@ -3156,7 +3127,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (!EventImpl) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = - !(SyclKernel && SyclKernel->isInterop()) && + (!SyclKernel || SyclKernel->hasSYCLMetadata()) && ProgramManager::getInstance().kernelUsesAssert(KernelName); if (KernelUsesAssert) { EventImpl = MEvent; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 692ffb84120ac..60ea610ebd02e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -417,8 +417,11 @@ event handler::finalize() { getOrInsertHandlerKernelBundle(/*Insert=*/false); if (KernelBundleImpPtr) { // Make sure implicit non-interop kernel bundles have the kernel - if (!KernelBundleImpPtr->isInterop() && - !impl->isStateExplicitKernelBundle()) { + if (!impl->isStateExplicitKernelBundle() && + (KernelBundleImpPtr->empty() || + KernelBundleImpPtr->hasSYCLOfflineImages()) && + !KernelBundleImpPtr->tryGetKernel(MKernelName.c_str(), + KernelBundleImpPtr)) { auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); kernel_id KernelID = diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 80c7ddf07c1b2..a290a68542bdb 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -163,11 +163,9 @@ const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( EXPECT_TRUE(KernelBundleImplPtr) << "Expect command group to contain kernel bundle"; - auto KernelID = sycl::detail::ProgramManager::getInstance().getSYCLKernelID( - ExecKernel->MKernelName); - sycl::kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - auto SyclKernelImpl = sycl::detail::getSyclObjImpl(SyclKernel); + auto SyclKernelImpl = KernelBundleImplPtr->tryGetKernel( + ExecKernel->MKernelName, KernelBundleImplPtr); + EXPECT_TRUE(SyclKernelImpl != nullptr); std::shared_ptr DeviceImageImpl = SyclKernelImpl->getDeviceImage(); ur_program_handle_t Program = DeviceImageImpl->get_ur_program_ref(); From cb591512847f2c39830ca75311a207d01ca7e285 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 10 Mar 2025 23:13:29 -0700 Subject: [PATCH 02/19] Remove unused binary ctor arg Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 9266ec6748e8c..d823ec7023f5f 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -180,7 +180,7 @@ class device_image_impl { std::shared_ptr> &&KernelIDs, syclex::source_language Lang, std::set &&KernelNames, std::unordered_map &&MangledKernelNames, - sycl_device_binary Binary, std::string &&Prefix) + std::string &&Prefix) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(nullptr), MKernelIDs(std::move(KernelIDs)), @@ -646,7 +646,7 @@ class device_image_impl { auto DevImgImpl = std::make_shared( NewImage, MContext, Devices, bundle_state::input, std::move(KernelIDs), MRTCBinInfo->MLanguage, - std::move(KernelNames), std::move(MangledKernelNames), Binary, + std::move(KernelNames), std::move(MangledKernelNames), std::string{Prefix}); DevImgPlainWithDeps ImgWithDeps{DevImgImpl}; From 0cc263e71c6d547ca3879c865ba742a7c7307092 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 10 Mar 2025 23:31:31 -0700 Subject: [PATCH 03/19] Fix interop case Signed-off-by: Larsen, Steffen --- sycl/source/handler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 60ea610ebd02e..ec402387e179e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -418,6 +418,7 @@ event handler::finalize() { if (KernelBundleImpPtr) { // Make sure implicit non-interop kernel bundles have the kernel if (!impl->isStateExplicitKernelBundle() && + !(MKernel && MKernel->isInterop()) && (KernelBundleImpPtr->empty() || KernelBundleImpPtr->hasSYCLOfflineImages()) && !KernelBundleImpPtr->tryGetKernel(MKernelName.c_str(), From 319631e8e8c48328e9d0b6c38f60e6f6998dfbad Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 11 Mar 2025 03:29:02 -0700 Subject: [PATCH 04/19] Fix USM indirect access enablement Signed-off-by: Larsen, Steffen --- sycl/source/detail/kernel_impl.cpp | 29 ++++++++++++++++++++--------- sycl/source/detail/kernel_impl.hpp | 2 ++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index b500f1fbff3ed..5bd7f14c63578 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -34,14 +34,7 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, "Input context must be the same as the context of cl_kernel"); // Enable USM indirect access for interoperability kernels. - // Some UR Adapters (like OpenCL) require this call to enable USM - // For others, UR will turn this into a NOP. - if (Context->getPlatformImpl()->supports_usm()) { - bool EnableAccess = true; - getAdapter()->call( - MKernel, UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t), - nullptr, &EnableAccess); - } + enableUSMIndirectAccess(); } kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, @@ -54,7 +47,13 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, MDeviceImageImpl(std::move(DeviceImageImpl)), MKernelBundleImpl(std::move(KernelBundleImpl)), MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), - MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} {} + MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} { + // Enable USM indirect access for interop and non-sycl-jit source kernels. + // sycl-jit kernels will enable this if needed through the regular kernel + // path. + if (MCreatedFromSource || MIsInterop) + enableUSMIndirectAccess(); +} kernel_impl::~kernel_impl() { try { @@ -119,6 +118,18 @@ void kernel_impl::checkIfValidForNumArgsInfoQuery() const { "interoperability function or to query a device built-in kernel"); } +void kernel_impl::enableUSMIndirectAccess() const { + if (!MContext->getPlatformImpl()->supports_usm()) + return; + + // Some UR Adapters (like OpenCL) require this call to enable USM + // For others, UR will turn this into a NOP. + bool EnableAccess = true; + getAdapter()->call( + MKernel, UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t), + nullptr, &EnableAccess); +} + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES template <> typename info::platform::version::return_type diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index ec1500f7add09..2e3589ac88ee7 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -263,6 +263,8 @@ class kernel_impl { size_t queryMaxNumWorkGroups(queue Queue, const range &WorkGroupSize, size_t DynamicLocalMemorySize) const; + + void enableUSMIndirectAccess() const; }; template From 343ba5387271451b05d1c6098b995d2eee110828 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 11 Mar 2025 05:12:50 -0700 Subject: [PATCH 05/19] Add join test Signed-off-by: Larsen, Steffen --- .../kernel_compiler_sycl_jit_join.cpp | 240 ++++++++++++++++++ 1 file changed, 240 insertions(+) create mode 100644 sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp new file mode 100644 index 0000000000000..50e1066800d13 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp @@ -0,0 +1,240 @@ +//==- kernel_compiler_sycl_jit_join.cpp --- kernel_compiler extension tests ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17255 + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out 1 +// RUN: %{l0_leak_check} %{run} %t.out 1 + +#include +#include +#include + +auto constexpr SYCLSource1 = R"""( +#include + +extern "C" SYCL_EXTERNAL +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr) { + *Ptr = 42; +} +)"""; + +auto constexpr SYCLSource2 = R"""( +#include + +extern "C" SYCL_EXTERNAL +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel2(int *Ptr) { + *Ptr = 24; +} +)"""; + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +class RegularSYCLKernel; + +constexpr int RegularSYCLKernelWriteValue = 4224; + +void RunRegularSYCLKernel(sycl::queue Q, const exe_kb &Bundle, int *Ptr) { + Q.submit([&](sycl::handler &CGH) { + CGH.use_kernel_bundle(Bundle); + CGH.single_task( + [=]() { *Ptr = RegularSYCLKernelWriteValue; }); + }).wait_and_throw(); +} + +int main() { + + sycl::queue Q; + sycl::context Ctx = Q.get_context(); + + if (!Q.get_device().ext_oneapi_can_compile( + syclex::source_language::sycl_jit)) { + std::cout << "Apparently this device does not support `sycl_jit` source " + "kernel bundle extension: " + << Q.get_device().get_info() + << std::endl; + return -1; + } + + int Failed = 0; + + source_kb KBSrc1 = syclex::create_kernel_bundle_from_source( + Ctx, syclex::source_language::sycl_jit, SYCLSource1); + source_kb KBSrc2 = syclex::create_kernel_bundle_from_source( + Ctx, syclex::source_language::sycl_jit, SYCLSource2); + + // Test joining of source kernel bundles. + { + std::vector KBSrcs{KBSrc1, KBSrc2}; + source_kb KBSrcJoined = sycl::join(KBSrcs); + + exe_kb KBExeJoined = syclex::build(KBSrcJoined); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout << "TestKernel1 in joined source bundles failed: " << *IntPtr + << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout << "TestKernel1 in joined source bundles failed: " << *IntPtr + << " != 24\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + + exe_kb KBExe1 = syclex::build(KBSrc1); + exe_kb KBExe2 = syclex::build(KBSrc2); + + // Test joining of source-based executable kernel bundles. + { + std::vector KBExes{KBExe1, KBExe2}; + + exe_kb KBExeJoined = sycl::join(KBExes); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout + << "TestKernel1 in joined source-based executable bundles failed: " + << *IntPtr << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout + << "TestKernel1 in joined source-based executable bundles failed: " + << *IntPtr << " != 24\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + + // Test joining of executable kernel bundles. + { + sycl::kernel_id RegularSYCLKernelID = + sycl::get_kernel_id(); + std::vector RegularSYCLKernelIDs{RegularSYCLKernelID}; + exe_kb RegularKBExe = + sycl::get_kernel_bundle( + Ctx, RegularSYCLKernelIDs); + std::vector KBExes{KBExe1, KBExe2, RegularKBExe}; + + exe_kb KBExeJoined = sycl::join(KBExes); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + assert(KBExeJoined.has_kernel(RegularSYCLKernelID, Q.get_device())); + assert(KBExeJoined.has_kernel()); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + sycl::kernel RegularSYCLK = KBExeJoined.get_kernel(RegularSYCLKernelID); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout << "TestKernel1 in joined mixed executable bundles failed: " + << *IntPtr << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout << "TestKernel1 in joined mixed executable bundles failed: " + << *IntPtr << " != 24\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(RegularSYCLK); + }).wait_and_throw(); + + if (*IntPtr != RegularSYCLKernelWriteValue) { + std::cout << "Regular SYCL kernel (explicit) in joined mixed executable " + "bundles failed: " + << *IntPtr << " != " << RegularSYCLKernelWriteValue << "\n"; + ++Failed; + } + *IntPtr = 0; + + RunRegularSYCLKernel(Q, KBExeJoined, IntPtr); + + if (*IntPtr != RegularSYCLKernelWriteValue) { + std::cout << "Regular SYCL kernel (implicit) in joined mixed executable " + "bundles failed: " + << *IntPtr << " != " << RegularSYCLKernelWriteValue << "\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + + return Failed; +} From 8ec71fc5c55750de16fc87029951cc53bce74e86 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 11 Mar 2025 05:21:13 -0700 Subject: [PATCH 06/19] Increment no_sycl_hpp test count Signed-off-by: Larsen, Steffen --- sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index cecce9368f21b..e51d5f86e1777 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 11 +// CHECK-NUM-MATCHES: 12 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From bd332890f89e469a77cdac3779717cb753c41a4f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 11 Mar 2025 05:56:27 -0700 Subject: [PATCH 07/19] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 3e9d81a60910a..b3586b2a8d502 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -567,7 +567,7 @@ class device_image_impl { assert(!std::holds_alternative(MBinImage)); assert(MRTCBinInfo); assert(MOrigins & ImageOriginKernelCompiler); - + std::shared_ptr ContextImpl = getSyclObjImpl(MContext); From 99800a640e3b9cb68f2dd6b26742a50698ecdba8 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 13 Mar 2025 03:51:52 -0700 Subject: [PATCH 08/19] Remove unintended arg Signed-off-by: Larsen, Steffen --- .../test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp index 50e1066800d13..2078f8d7cc478 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_join.cpp @@ -16,8 +16,8 @@ // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17255 // RUN: %{build} -o %t.out -// RUN: %{run} %t.out 1 -// RUN: %{l0_leak_check} %{run} %t.out 1 +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out #include #include From ca9d55537430753895c8f37564e37ba2c5e1eac3 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 13 Mar 2025 23:28:59 -0700 Subject: [PATCH 09/19] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/test-e2e/KernelCompiler/sycl_join.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp index 1d824e3d17a22..5c72db1cc226e 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_join.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -66,8 +66,7 @@ int main() { sycl::queue Q; sycl::context Ctx = Q.get_context(); - if (!Q.get_device().ext_oneapi_can_compile( - syclex::source_language::sycl)) { + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { std::cout << "Apparently this device does not support `sycl_jit` source " "kernel bundle extension: " << Q.get_device().get_info() From 0ed90017732adcd0fd354114eede14b203b489cd Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 17 Mar 2025 06:08:03 -0700 Subject: [PATCH 10/19] Fix issue of adoptive binaries and deallocation Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 31 ++++++++++--- sycl/source/detail/kernel_bundle_impl.hpp | 28 ++++-------- sycl/test-e2e/KernelCompiler/sycl_join.cpp | 53 ++++++++++++++++++++-- 3 files changed, 83 insertions(+), 29 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 7307d33c573aa..c6662088e04d8 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -115,6 +115,22 @@ struct KernelCompilerBinaryInfo { include_pairs_t MIncludePairs; }; +// Helper class to unregister shared SYCL binaries. +class ManagedDeviceBinaries { +public: + ManagedDeviceBinaries(sycl_device_binaries &&Binaries) + : MBinaries{Binaries} {} + ~ManagedDeviceBinaries() try { + ProgramManager::getInstance().removeImages(MBinaries); + syclex::detail::SYCL_JIT_Destroy(MBinaries); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM( + "exception during unregistration of SYCL binaries", e); + } +private: + sycl_device_binaries MBinaries; +}; + // The class is impl counterpart for sycl::device_image // It can represent a program in different states, kernel_id's it has and state // of specialization constants for it @@ -550,12 +566,12 @@ class device_image_impl { return MRTCBinInfo && MRTCBinInfo->MLanguage == Lang; } - std::vector> - buildFromSource(const std::vector Devices, - const std::vector &BuildOptions, - std::string *LogPtr, - const std::vector &RegisteredKernelNames, - std::vector &OutDeviceBinaries) const { + std::vector> buildFromSource( + const std::vector Devices, + const std::vector &BuildOptions, std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector> &OutDeviceBinaries) + const { assert(!std::holds_alternative(MBinImage)); assert(MRTCBinInfo); assert(MOrigins & ImageOriginKernelCompiler); @@ -666,7 +682,8 @@ class device_image_impl { Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); } - OutDeviceBinaries.push_back(std::move(Binaries)); + OutDeviceBinaries.emplace_back( + std::make_shared(std::move(Binaries))); return Result; } diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index d2919cba99107..d9b96becdbe7a 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -302,9 +302,11 @@ class kernel_bundle_impl { } for (const detail::KernelBundleImplPtr &Bundle : Bundles) { - MDeviceImages.insert(MDeviceImages.end(), Bundle->MDeviceImages.begin(), Bundle->MDeviceImages.end()); + MDeviceBinaries.insert(MDeviceBinaries.end(), + Bundle->MDeviceBinaries.begin(), + Bundle->MDeviceBinaries.end()); } fillUniqueDeviceImages(); @@ -368,9 +370,10 @@ class kernel_bundle_impl { // oneapi_ext_kernel_compiler // construct from built source files - kernel_bundle_impl(const context &Context, const std::vector &Devs, - std::vector &&DevImgs, - std::vector &&DevBinaries) + kernel_bundle_impl( + const context &Context, const std::vector &Devs, + std::vector &&DevImgs, + std::vector> &&DevBinaries) : MContext(Context), MDevices(Devs), MUniqueDeviceImages(std::move(DevImgs)), MState(bundle_state::executable), @@ -393,7 +396,7 @@ class kernel_bundle_impl { assert(allSourceBasedImages() && "All images must be source-based."); std::vector NewDevImgs; - std::vector NewDeviceBinaries; + std::vector> NewDeviceBinaries; for (device_image_plain &DevImg : MUniqueDeviceImages) { std::vector> NewDevImgImpls = getSyclObjImpl(DevImg)->buildFromSource(Devices, BuildOptions, LogPtr, @@ -626,17 +629,6 @@ class kernel_bundle_impl { return true; } - ~kernel_bundle_impl() { - try { - for (sycl_device_binaries &Binaries : MDeviceBinaries) { - ProgramManager::getInstance().removeImages(Binaries); - syclex::detail::SYCL_JIT_Destroy(Binaries); - } - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~kernel_bundle_impl", e); - } - } - bool hasSourceBasedImages() const noexcept { return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { return getSyclObjImpl(DevImg)->getOriginMask() & @@ -771,8 +763,8 @@ class kernel_bundle_impl { bundle_state MState; // For sycl_jit, building from source may have produced sycl binaries that - // this kernel_bundle now manages. - std::vector MDeviceBinaries; + // the kernel_bundles now manage. + std::vector> MDeviceBinaries; }; } // namespace detail diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp index 5c72db1cc226e..d579d9d1adc60 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_join.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -121,12 +121,12 @@ int main() { sycl::free(IntPtr, Q); } - exe_kb KBExe1 = syclex::build(KBSrc1); - exe_kb KBExe2 = syclex::build(KBSrc2); + auto KBExe1 = std::make_shared(syclex::build(KBSrc1)); + auto KBExe2 = std::make_shared(syclex::build(KBSrc2)); // Test joining of source-based executable kernel bundles. { - std::vector KBExes{KBExe1, KBExe2}; + std::vector KBExes{*KBExe1, *KBExe2}; exe_kb KBExeJoined = sycl::join(KBExes); assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); @@ -173,7 +173,7 @@ int main() { exe_kb RegularKBExe = sycl::get_kernel_bundle( Ctx, RegularSYCLKernelIDs); - std::vector KBExes{KBExe1, KBExe2, RegularKBExe}; + std::vector KBExes{*KBExe1, *KBExe2, RegularKBExe}; exe_kb KBExeJoined = sycl::join(KBExes); assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); @@ -235,5 +235,50 @@ int main() { sycl::free(IntPtr, Q); } + // Test joining of executable kernel bundles with the original bundles dying + // before the parent. + { + std::vector KBExes{*KBExe1, *KBExe2}; + + KBExe1.reset(); + KBExe2.reset(); + + exe_kb KBExeJoined = sycl::join(KBExes); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout << "TestKernel1 in joined source-based executable bundles with " + "dead parents failed: " + << *IntPtr << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout << "TestKernel1 in joined source-based executable bundles with " + "dead parents failed: " + << *IntPtr << " != 24\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + return Failed; } From 8b5e13344b6e0722ff94bf22d9c6913d1511a3da Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 17 Mar 2025 07:06:46 -0700 Subject: [PATCH 11/19] Split ctors Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index c6662088e04d8..33fa6198daea4 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -56,11 +56,15 @@ constexpr uint8_t ImageOriginKernelCompiler = 1 << 2; // Information unique to images compiled at runtime through the // ext_oneapi_kernel_compiler extension. struct KernelCompilerBinaryInfo { + KernelCompilerBinaryInfo(syclex::source_language Lang) : MLanguage{Lang} {} + KernelCompilerBinaryInfo(syclex::source_language Lang, - std::set &&KernelNames = {}, - include_pairs_t &&IncludePairsVec = {}) - : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, - MIncludePairs{std::move(IncludePairsVec)} {} + include_pairs_t &&IncludePairsVec) + : MLanguage{Lang}, MIncludePairs{std::move(IncludePairsVec)} {} + + KernelCompilerBinaryInfo(syclex::source_language Lang, + std::set &&KernelNames) + : MLanguage{Lang}, MKernelNames{std::move(KernelNames)} {} KernelCompilerBinaryInfo( syclex::source_language Lang, std::set &&KernelNames, @@ -218,8 +222,8 @@ class device_image_impl { MKernelIDs(std::make_shared>()), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), - MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::set{}, - std::move(IncludePairsVec)}) { + MRTCBinInfo( + KernelCompilerBinaryInfo{Lang, std::move(IncludePairsVec)}) { updateSpecConstSymMap(); } From 66f82b10c0ad92cdcab2869a79f125f69480e2f8 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 17 Mar 2025 09:34:17 -0700 Subject: [PATCH 12/19] Remove old sycl_jit ref Signed-off-by: Larsen, Steffen --- sycl/test-e2e/KernelCompiler/sycl_join.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp index d579d9d1adc60..2b4dc8f8e1bec 100644 --- a/sycl/test-e2e/KernelCompiler/sycl_join.cpp +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -67,7 +67,7 @@ int main() { sycl::context Ctx = Q.get_context(); if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { - std::cout << "Apparently this device does not support `sycl_jit` source " + std::cout << "Apparently this device does not support `sycl` source " "kernel bundle extension: " << Q.get_device().get_info() << std::endl; From e35a2c045df38adfc9b3328ba9767fa3f0ab7609 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 25 Mar 2025 02:32:44 -0700 Subject: [PATCH 13/19] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 6 ++---- sycl/source/detail/kernel_bundle_impl.hpp | 4 ++-- sycl/source/detail/program_manager/program_manager.cpp | 3 +-- sycl/source/detail/program_manager/program_manager.hpp | 5 ++--- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 75e45caa27a7f..5b897734fbe03 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -121,12 +121,10 @@ class ManagedDeviceGlobalsRegistry { // Helper class to unregister shared SYCL binaries. class ManagedDeviceBinaries { public: - ManagedDeviceBinaries( - sycl_device_binaries &&Binaries) + ManagedDeviceBinaries(sycl_device_binaries &&Binaries) : MBinaries{Binaries} {} - ManagedDeviceBinaries(const ManagedDeviceBinaries &) = - delete; + ManagedDeviceBinaries(const ManagedDeviceBinaries &) = delete; ~ManagedDeviceBinaries() try { ProgramManager::getInstance().removeImages(MBinaries); diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 7f937a6326815..3cac4e5b2001f 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -306,8 +306,8 @@ class kernel_bundle_impl { MDeviceImages.insert(MDeviceImages.end(), Bundle->MDeviceImages.begin(), Bundle->MDeviceImages.end()); MSharedDeviceBinaries.insert(MSharedDeviceBinaries.end(), - Bundle->MSharedDeviceBinaries.begin(), - Bundle->MSharedDeviceBinaries.end()); + Bundle->MSharedDeviceBinaries.begin(), + Bundle->MSharedDeviceBinaries.end()); } fillUniqueDeviceImages(); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index ff3e2cb30fce3..da4bcfa2bf5e5 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1904,8 +1904,7 @@ static bool isCompiledAtRuntime(sycl_device_binaries DeviceBinary) { return false; } -void ProgramManager::addImage(sycl_device_binary RawImg, - bool IsRTC, +void ProgramManager::addImage(sycl_device_binary RawImg, bool IsRTC, RTDeviceBinaryImage **OutImage, std::vector *OutKernelIDs) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 364413c702021..a149ceff43928 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -215,8 +215,7 @@ class ProgramManager { ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr &Context); - void addImage(sycl_device_binary RawImg, - bool IsRTC, + void addImage(sycl_device_binary RawImg, bool IsRTC, RTDeviceBinaryImage **OutImage = nullptr, std::vector *OutKernelIDs = nullptr); void addImages(sycl_device_binaries DeviceImages); @@ -268,7 +267,7 @@ class ProgramManager { // returned. DeviceGlobalMapEntry * tryGetDeviceGlobalEntry(const std::string &UniqueId, - bool ExcludeDeviceImageScopeDecorated = false); + bool ExcludeDeviceImageScopeDecorated = false); // The function gets multiple device_global entries identified by their unique // IDs from the device_global map. From 3f4137bc50b01ed2fa8fc790eebd47188a24021c Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 25 Mar 2025 02:46:21 -0700 Subject: [PATCH 14/19] Make MSVC happy about try-catch dtor Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 5b897734fbe03..28843b7909fb2 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -66,11 +66,13 @@ class ManagedDeviceGlobalsRegistry { ManagedDeviceGlobalsRegistry(const ManagedDeviceGlobalsRegistry &) = delete; - ~ManagedDeviceGlobalsRegistry() try { - unregisterDeviceGlobalsFromContext(); - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM( - "exception during unregistration of SYCL binaries", e); + ~ManagedDeviceGlobalsRegistry() { + try { + unregisterDeviceGlobalsFromContext(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM( + "exception during unregistration of SYCL binaries", e); + } } bool hasDeviceGlobalName(const std::string &Name) const noexcept { From 17ca3a93c4c756247745301ea7071119ad5f0611 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 25 Mar 2025 03:05:13 -0700 Subject: [PATCH 15/19] Another attempt at appeasing MSVC Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 28843b7909fb2..02e434b4d937a 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -128,12 +128,14 @@ class ManagedDeviceBinaries { ManagedDeviceBinaries(const ManagedDeviceBinaries &) = delete; - ~ManagedDeviceBinaries() try { - ProgramManager::getInstance().removeImages(MBinaries); - syclex::detail::SYCL_JIT_Destroy(MBinaries); - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM( - "exception during unregistration of SYCL binaries", e); + ~ManagedDeviceBinaries() { + try { + ProgramManager::getInstance().removeImages(MBinaries); + syclex::detail::SYCL_JIT_Destroy(MBinaries); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM( + "exception during unregistration of SYCL binaries", e); + } } private: From e91f6439e2397b301a87cd961a0280ff69bf5a6b Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 25 Mar 2025 04:45:45 -0700 Subject: [PATCH 16/19] Disallow conflicting include headers Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 02e434b4d937a..ae9367bb3ab9f 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -169,6 +169,7 @@ struct KernelCompilerBinaryInfo { Merge(const std::vector *> &RTCInfos) { std::optional Result = std::nullopt; + std::map IncludePairMap; for (const std::optional *RTCInfoPtr : RTCInfos) { if (!RTCInfoPtr || !(*RTCInfoPtr)) continue; @@ -196,6 +197,20 @@ struct KernelCompilerBinaryInfo { Result->MMangledKernelNames.insert(RTCInfo->MMangledKernelNames.begin(), RTCInfo->MMangledKernelNames.end()); + // Assumption is that there are no duplicates, but in the case we let + // duplicates through it should be alright to pay for the minimal extra + // space allocated. + Result->MIncludePairs.reserve(RTCInfo->MIncludePairs.size()); + for (const auto &IncludePair : RTCInfo->MIncludePairs) { + auto Inserted = IncludePairMap.insert(IncludePair); + if (!Inserted.second) { + if (Inserted.first->second != IncludePair.second) + throw sycl::exception(make_error_code(errc::invalid), + "Conflicting include files."); + } else { + Result->MIncludePairs.push_back(IncludePair); + } + } Result->MIncludePairs.insert(Result->MIncludePairs.end(), RTCInfo->MIncludePairs.begin(), RTCInfo->MIncludePairs.end()); From 1e7f5e6398f52d1691b856a4ca4637e3b551c3e2 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 25 Mar 2025 05:45:26 -0700 Subject: [PATCH 17/19] Remove unintentional include pair adding Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index ae9367bb3ab9f..01b0bd172eb62 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -211,9 +211,6 @@ struct KernelCompilerBinaryInfo { Result->MIncludePairs.push_back(IncludePair); } } - Result->MIncludePairs.insert(Result->MIncludePairs.end(), - RTCInfo->MIncludePairs.begin(), - RTCInfo->MIncludePairs.end()); Result->MDeviceGlobalRegistries.insert( Result->MDeviceGlobalRegistries.end(), From ffbe77754e8a2468efbe0ddff5d42947de4ddfe5 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Apr 2025 04:20:09 -0700 Subject: [PATCH 18/19] Fix dependency resolution Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 44 +++++++++++++++---- .../program_manager/program_manager.cpp | 42 ++++++++++-------- .../program_manager/program_manager.hpp | 17 ++++--- 3 files changed, 71 insertions(+), 32 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 470b1bae68bbf..1995dc0f8cd6e 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -746,19 +746,28 @@ class device_image_impl { MRTCBinInfo->MIncludePairs, BuildOptions, LogPtr); auto &PM = detail::ProgramManager::getInstance(); - std::vector> Result; - Result.reserve(Binaries->NumDeviceBinaries); + + // Add all binaries and keep the images for processing. + std::vector>>> + NewImages; + NewImages.reserve(Binaries->NumDeviceBinaries); for (int I = 0; I < Binaries->NumDeviceBinaries; I++) { sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); - RTDeviceBinaryImage *NewImage = nullptr; auto KernelIDs = std::make_shared>(); PM.addImage(Binary, &NewImage, KernelIDs.get()); + if (NewImage) + NewImages.push_back( + std::make_pair(std::move(NewImage), std::move(KernelIDs))); + } - // If the image is empty, we can skip it. - if (!NewImage) - continue; - + // Now bring all images into the proper state. Note that we do this in a + // separate pass over NewImages to make sure dependency images have been + // registered beforehand. + std::vector> Result; + Result.reserve(NewImages.size()); + for (auto &[NewImage, KernelIDs] : NewImages) { std::set KernelNames; std::unordered_map MangledKernelNames; std::unordered_set DeviceGlobalIDSet; @@ -843,7 +852,26 @@ class device_image_impl { std::move(KernelNames), std::move(MangledKernelNames), std::string{Prefix}, std::move(DGRegs)); - DevImgPlainWithDeps ImgWithDeps{DevImgImpl}; + // Resolve dependencies. + // TODO: Consider making a collectDeviceImageDeps variant that takes a + // set reference and inserts into that instead. + std::set ImgDeps; + for (const device &Device : Devices) { + std::set DevImgDeps = + PM.collectDeviceImageDeps(*NewImage, Device); + ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); + } + + // Pack main image and dependencies together. + std::vector NewImageAndDeps; + NewImageAndDeps.reserve(1 + ImgDeps.size()); + NewImageAndDeps.push_back(std::move( + createSyclObjFromImpl(std::move(DevImgImpl)))); + for (RTDeviceBinaryImage *ImgDep : ImgDeps) + NewImageAndDeps.push_back(PM.createDependencyImage( + MContext, Devices, ImgDep, bundle_state::input)); + + DevImgPlainWithDeps ImgWithDeps(std::move(NewImageAndDeps)); PM.bringSYCLDeviceImageToState(ImgWithDeps, bundle_state::executable); Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 00bcb80086959..a3331019a97b2 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2698,32 +2698,36 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( Images.reserve(Deps.size() + 1); Images.push_back( createSyclObjFromImpl(std::move(MainImpl))); - for (RTDeviceBinaryImage *Dep : Deps) { - std::shared_ptr> DepKernelIDs; - { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - // For device library images, they are not in m_BinImg2KernelIDs since - // no kernel is included. - auto DepIt = m_BinImg2KernelIDs.find(Dep); - if (DepIt != m_BinImg2KernelIDs.end()) - DepKernelIDs = DepIt->second; - } - - assert(ImgInfoPair.second.State == getBinImageState(Dep) && - "State mismatch between main image and its dependency"); - DeviceImageImplPtr DepImpl = std::make_shared( - Dep, Ctx, Devs, ImgInfoPair.second.State, DepKernelIDs, - /*PIProgram=*/nullptr); - + for (RTDeviceBinaryImage *Dep : Deps) Images.push_back( - createSyclObjFromImpl(std::move(DepImpl))); - } + createDependencyImage(Ctx, Devs, Dep, ImgInfoPair.second.State)); SYCLDeviceImages.push_back(std::move(Images)); } return SYCLDeviceImages; } +device_image_plain ProgramManager::createDependencyImage( + const context &Ctx, const std::vector &Devs, + RTDeviceBinaryImage *DepImage, bundle_state DepState) { + std::shared_ptr> DepKernelIDs; + { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // For device library images, they are not in m_BinImg2KernelIDs since + // no kernel is included. + auto DepIt = m_BinImg2KernelIDs.find(DepImage); + if (DepIt != m_BinImg2KernelIDs.end()) + DepKernelIDs = DepIt->second; + } + + assert(DepState == getBinImageState(DepImage) && + "State mismatch between main image and its dependency"); + DeviceImageImplPtr DepImpl = std::make_shared( + DepImage, Ctx, Devs, DepState, DepKernelIDs, /*PIProgram=*/nullptr); + + return createSyclObjFromImpl(std::move(DepImpl)); +} + void ProgramManager::bringSYCLDeviceImageToState( DevImgPlainWithDeps &DeviceImage, bundle_state TargetState) { device_image_plain &MainImg = DeviceImage.getMain(); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index a29604e6370d8..f683a1c675935 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -297,6 +297,12 @@ class ProgramManager { const context &Ctx, const std::vector &Devs, bundle_state TargetState, const std::vector &KernelIDs = {}); + // Creates a new dependency image for a given dependency binary image. + device_image_plain createDependencyImage(const context &Ctx, + const std::vector &Devs, + RTDeviceBinaryImage *DepImage, + bundle_state DepState); + // Bring image to the required state. Does it inplace void bringSYCLDeviceImageToState(DevImgPlainWithDeps &DeviceImage, bundle_state TargetState); @@ -363,6 +369,12 @@ class ProgramManager { std::set getRawDeviceImages(const std::vector &KernelIDs); + std::set + collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev); + std::set + collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, + const device &Dev); + private: ProgramManager(ProgramManager const &) = delete; ProgramManager &operator=(ProgramManager const &) = delete; @@ -386,11 +398,6 @@ class ProgramManager { /// Add info on kernels using local arg into cache void cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img); - std::set - collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev); - std::set - collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, - const device &Dev); std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device &Dev); From a6acfa23cc96f22177b405f071eaa195bfa1a7a0 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Apr 2025 05:06:14 -0700 Subject: [PATCH 19/19] Remove unnecessary move Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_image_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 1995dc0f8cd6e..4b4d454475eb7 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -865,8 +865,8 @@ class device_image_impl { // Pack main image and dependencies together. std::vector NewImageAndDeps; NewImageAndDeps.reserve(1 + ImgDeps.size()); - NewImageAndDeps.push_back(std::move( - createSyclObjFromImpl(std::move(DevImgImpl)))); + NewImageAndDeps.push_back( + createSyclObjFromImpl(std::move(DevImgImpl))); for (RTDeviceBinaryImage *ImgDep : ImgDeps) NewImageAndDeps.push_back(PM.createDependencyImage( MContext, Devices, ImgDep, bundle_state::input));