diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 1e57ebf9888a1..3637db5e56d19 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -299,7 +299,8 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, // symbols (e.g. when kernel_bundle is supposed to be joined with another). auto KernelIDs = std::make_shared>(); auto DevImgImpl = std::make_shared( - nullptr, TargetContext, Devices, State, KernelIDs, UrProgram); + nullptr, TargetContext, Devices, State, KernelIDs, UrProgram, + ImageOriginInterop); device_image_plain DevImg{DevImgImpl}; return std::make_shared(TargetContext, Devices, DevImg); diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 41af7b1840b32..4b4d454475eb7 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -12,8 +12,12 @@ #include #include #include +#include +#include #include +#include #include +#include #include #include #include @@ -38,6 +42,193 @@ template struct LessByHash { } }; +namespace syclex = sycl::ext::oneapi::experimental; + +using include_pairs_t = + std::vector>; + +// Bits representing the origin of a given image, i.e. regular offline SYCL +// compilation, interop, kernel_compiler online compilation, etc. +constexpr uint8_t ImageOriginSYCLOffline = 1; +constexpr uint8_t ImageOriginInterop = 1 << 1; +constexpr uint8_t ImageOriginKernelCompiler = 1 << 2; + +// Helper class to track and unregister shared SYCL device_globals. +class ManagedDeviceGlobalsRegistry { +public: + ManagedDeviceGlobalsRegistry( + const std::shared_ptr &ContextImpl, + const std::string &Prefix, std::vector &&DeviceGlobalNames, + std::vector> &&DeviceGlobalAllocations) + : MContextImpl{ContextImpl}, MPrefix{Prefix}, + MDeviceGlobalNames{std::move(DeviceGlobalNames)}, + MDeviceGlobalAllocations{std::move(DeviceGlobalAllocations)} {} + + ManagedDeviceGlobalsRegistry(const ManagedDeviceGlobalsRegistry &) = delete; + + ~ManagedDeviceGlobalsRegistry() { + try { + unregisterDeviceGlobalsFromContext(); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM( + "exception during unregistration of SYCL binaries", e); + } + } + + bool hasDeviceGlobalName(const std::string &Name) const noexcept { + return !MDeviceGlobalNames.empty() && + std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), + mangleDeviceGlobalName(Name)) != MDeviceGlobalNames.end(); + } + + DeviceGlobalMapEntry *tryGetDeviceGlobalEntry(const std::string &Name) const { + auto &PM = detail::ProgramManager::getInstance(); + return PM.tryGetDeviceGlobalEntry(MPrefix + mangleDeviceGlobalName(Name)); + } + +private: + static std::string mangleDeviceGlobalName(const std::string &Name) { + // TODO: Support device globals declared in namespaces. + return "_Z" + std::to_string(Name.length()) + Name; + } + + void unregisterDeviceGlobalsFromContext() { + if (MDeviceGlobalNames.empty()) + return; + + // Manually trigger the release of resources for all device global map + // entries associated with this runtime-compiled bundle. Normally, this + // would happen in `~context_impl()`, however in the RTC setting, the + // context outlives the DG map entries owned by the program manager. + + std::vector DeviceGlobalIDs; + std::transform(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), + std::back_inserter(DeviceGlobalIDs), + [&](const std::string &DGName) { return MPrefix + DGName; }); + for (DeviceGlobalMapEntry *Entry : + ProgramManager::getInstance().getDeviceGlobalEntries( + DeviceGlobalIDs)) { + Entry->removeAssociatedResources(MContextImpl.get()); + MContextImpl->removeAssociatedDeviceGlobal(Entry->MDeviceGlobalPtr); + } + } + + std::shared_ptr MContextImpl; + + std::string MPrefix; + std::vector MDeviceGlobalNames; + std::vector> MDeviceGlobalAllocations; +}; + +// Helper class to unregister shared SYCL binaries. +class ManagedDeviceBinaries { +public: + ManagedDeviceBinaries(sycl_device_binaries &&Binaries) + : MBinaries{Binaries} {} + + ManagedDeviceBinaries(const ManagedDeviceBinaries &) = delete; + + ~ManagedDeviceBinaries() { + try { + ProgramManager::getInstance().removeImages(MBinaries); + syclex::detail::SYCL_JIT_Destroy(MBinaries); + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM( + "exception during unregistration of SYCL binaries", e); + } + } + +private: + sycl_device_binaries MBinaries; +}; + +// Information unique to images compiled at runtime through the +// ext_oneapi_kernel_compiler extension. +struct KernelCompilerBinaryInfo { + KernelCompilerBinaryInfo(syclex::source_language Lang) : MLanguage{Lang} {} + + KernelCompilerBinaryInfo(syclex::source_language Lang, + include_pairs_t &&IncludePairsVec) + : MLanguage{Lang}, MIncludePairs{std::move(IncludePairsVec)} {} + + KernelCompilerBinaryInfo(syclex::source_language Lang, + std::set &&KernelNames) + : MLanguage{Lang}, MKernelNames{std::move(KernelNames)} {} + + KernelCompilerBinaryInfo( + syclex::source_language Lang, std::set &&KernelNames, + std::unordered_map &&MangledKernelNames, + std::string &&Prefix, + std::shared_ptr &&DeviceGlobalRegistry) + : MLanguage{Lang}, MKernelNames{std::move(KernelNames)}, + MMangledKernelNames{std::move(MangledKernelNames)}, + MPrefix{std::move(Prefix)}, + MDeviceGlobalRegistries{std::move(DeviceGlobalRegistry)} {} + + static std::optional + Merge(const std::vector *> + &RTCInfos) { + std::optional Result = std::nullopt; + std::map IncludePairMap; + for (const std::optional *RTCInfoPtr : RTCInfos) { + if (!RTCInfoPtr || !(*RTCInfoPtr)) + continue; + const std::optional &RTCInfo = *RTCInfoPtr; + + if (!Result) { + Result = RTCInfo; + continue; + } + + if (RTCInfo->MLanguage != Result->MLanguage) + throw sycl::exception(make_error_code(errc::invalid), + "Linking binaries with different source " + "languages is not currently supported."); + + if (!RTCInfo->MPrefix.empty() && !Result->MPrefix.empty() && + RTCInfo->MPrefix != Result->MPrefix) + throw sycl::exception(make_error_code(errc::invalid), + "Linking binaries with different kernel prefixes " + "is not currently supported."); + + for (const std::string &KernelName : RTCInfo->MKernelNames) + Result->MKernelNames.insert(KernelName); + + Result->MMangledKernelNames.insert(RTCInfo->MMangledKernelNames.begin(), + RTCInfo->MMangledKernelNames.end()); + + // Assumption is that there are no duplicates, but in the case we let + // duplicates through it should be alright to pay for the minimal extra + // space allocated. + Result->MIncludePairs.reserve(RTCInfo->MIncludePairs.size()); + for (const auto &IncludePair : RTCInfo->MIncludePairs) { + auto Inserted = IncludePairMap.insert(IncludePair); + if (!Inserted.second) { + if (Inserted.first->second != IncludePair.second) + throw sycl::exception(make_error_code(errc::invalid), + "Conflicting include files."); + } else { + Result->MIncludePairs.push_back(IncludePair); + } + } + + Result->MDeviceGlobalRegistries.insert( + Result->MDeviceGlobalRegistries.end(), + RTCInfo->MDeviceGlobalRegistries.begin(), + RTCInfo->MDeviceGlobalRegistries.end()); + } + return Result; + } + + syclex::source_language MLanguage; + std::set MKernelNames; + std::unordered_map MMangledKernelNames; + std::string MPrefix; + include_pairs_t MIncludePairs; + std::vector> + MDeviceGlobalRegistries; +}; + // The class is impl counterpart for sycl::device_image // It can represent a program in different states, kernel_id's it has and state // of specialization constants for it @@ -60,25 +251,101 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, std::shared_ptr> KernelIDs, - ur_program_handle_t Program) + ur_program_handle_t Program, + uint8_t Origins = ImageOriginSYCLOffline) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), - MSpecConstsDefValBlob(getSpecConstsDefValBlob()) { + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(Origins) { updateSpecConstSymMap(); } - device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, - std::vector Devices, bundle_state State, + device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, + const std::vector &Devices, bundle_state State, std::shared_ptr> KernelIDs, ur_program_handle_t Program, const SpecConstMapT &SpecConstMap, - const std::vector &SpecConstsBlob) + const std::vector &SpecConstsBlob, + uint8_t Origins, + std::optional &&RTCInfo) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), - MSpecConstSymMap(SpecConstMap) {} + MSpecConstSymMap(SpecConstMap), MOrigins(Origins), + MRTCBinInfo(std::move(RTCInfo)) {} + + device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, + const std::vector &Devices, bundle_state State, + ur_program_handle_t Program, syclex::source_language Lang, + std::set &&KernelNames) + : MBinImage(BinImage), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(State), MProgram(Program), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) { + updateSpecConstSymMap(); + } + + device_image_impl( + const RTDeviceBinaryImage *BinImage, const context &Context, + const std::vector &Devices, bundle_state State, + std::shared_ptr> &&KernelIDs, + syclex::source_language Lang, std::set &&KernelNames, + std::unordered_map &&MangledKernelNames, + std::string &&Prefix, + std::shared_ptr &&DeviceGlobalRegistry) + : MBinImage(BinImage), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(State), MProgram(nullptr), + MKernelIDs(std::move(KernelIDs)), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{ + Lang, std::move(KernelNames), std::move(MangledKernelNames), + std::move(Prefix), std::move(DeviceGlobalRegistry)}) { + updateSpecConstSymMap(); + } + + device_image_impl(const std::string &Src, context Context, + const std::vector &Devices, + syclex::source_language Lang, + include_pairs_t &&IncludePairsVec) + : MBinImage(Src), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(bundle_state::ext_oneapi_source), + MProgram(nullptr), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo( + KernelCompilerBinaryInfo{Lang, std::move(IncludePairsVec)}) { + updateSpecConstSymMap(); + } + + device_image_impl(const std::vector &Bytes, const context &Context, + const std::vector &Devices, + syclex::source_language Lang) + : MBinImage(Bytes), MContext(std::move(Context)), + MDevices(std::move(Devices)), MState(bundle_state::ext_oneapi_source), + MProgram(nullptr), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang}) { + updateSpecConstSymMap(); + } + + device_image_impl(const context &Context, const std::vector &Devices, + bundle_state State, ur_program_handle_t Program, + syclex::source_language Lang, + std::set &&KernelNames) + : MBinImage(static_cast(nullptr)), + MContext(std::move(Context)), MDevices(std::move(Devices)), + MState(State), MProgram(Program), + MKernelIDs(std::make_shared>()), + MSpecConstsDefValBlob(getSpecConstsDefValBlob()), + MOrigins(ImageOriginKernelCompiler), + MRTCBinInfo(KernelCompilerBinaryInfo{Lang, std::move(KernelNames)}) {} bool has_kernel(const kernel_id &KernelIDCand) const noexcept { return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(), @@ -127,7 +394,7 @@ class device_image_impl { // function is make_kernel(), but I'm not sure if it's even possible to // use spec constant with such kernel. So, in such case we need to check // if it's JIT or no somehow. - assert(MBinImage && + assert(hasRTDeviceBinaryImage() && "native_specialization_constant() called for unimplemented case"); auto IsJITSPIRVTarget = [](const char *Target) { @@ -136,7 +403,7 @@ class device_image_impl { }; return (MContext.get_backend() == backend::opencl || MContext.get_backend() == backend::ext_oneapi_level_zero) && - IsJITSPIRVTarget(MBinImage->getRawData().DeviceTargetSpec); + IsJITSPIRVTarget(get_bin_image_ref()->getRawData().DeviceTargetSpec); } bool has_specialization_constant(const char *SpecName) const noexcept { @@ -231,7 +498,7 @@ class device_image_impl { bool specialization_constants_replaced_with_default() const noexcept { sycl_device_binary_property Prop = - MBinImage->getProperty("specConstsReplacedWithDefault"); + get_bin_image_ref()->getProperty("specConstsReplacedWithDefault"); return Prop && (DeviceBinaryProperty(Prop).asUint32() != 0); } @@ -251,7 +518,9 @@ class device_image_impl { return MProgram; } - const RTDeviceBinaryImage *&get_bin_image_ref() noexcept { return MBinImage; } + const RTDeviceBinaryImage *const &get_bin_image_ref() const noexcept { + return std::get(MBinImage); + } const context &get_context() const noexcept { return MContext; } @@ -325,16 +594,492 @@ class device_image_impl { } } + std::string adjustKernelName(const std::string &Name) const { + if (!MRTCBinInfo.has_value()) + return Name; + + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { + auto It = MRTCBinInfo->MMangledKernelNames.find(Name); + if (It != MRTCBinInfo->MMangledKernelNames.end()) + return It->second; + } + + return Name; + } + + bool hasKernelName(const std::string &Name) const { + return MRTCBinInfo.has_value() && !Name.empty() && + MRTCBinInfo->MKernelNames.find(adjustKernelName(Name)) != + MRTCBinInfo->MKernelNames.end(); + } + + std::shared_ptr tryGetSourceBasedKernel( + const std::string &Name, const context &Context, + const std::shared_ptr &OwnerBundle, + const std::shared_ptr &Self) const { + if (!(getOriginMask() & ImageOriginKernelCompiler)) + return nullptr; + + assert(MRTCBinInfo); + std::string AdjustedName = adjustKernelName(Name); + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { + auto &PM = ProgramManager::getInstance(); + auto KID = PM.tryGetSYCLKernelID(MRTCBinInfo->MPrefix + AdjustedName); + + if (!KID || !has_kernel(*KID)) + return nullptr; + + auto UrProgram = get_ur_program_ref(); + auto [UrKernel, CacheMutex, ArgMask] = + PM.getOrCreateKernel(Context, AdjustedName, + /*PropList=*/{}, UrProgram); + return std::make_shared(UrKernel, getSyclObjImpl(Context), + Self, OwnerBundle, ArgMask, + UrProgram, CacheMutex); + } + + ur_program_handle_t UrProgram = get_ur_program_ref(); + const AdapterPtr &Adapter = getSyclObjImpl(Context)->getAdapter(); + ur_kernel_handle_t UrKernel = nullptr; + Adapter->call(UrProgram, AdjustedName.c_str(), + &UrKernel); + // Kernel created by urKernelCreate is implicitly retained. + + return std::make_shared( + UrKernel, detail::getSyclObjImpl(Context), Self, OwnerBundle, + /*ArgMask=*/nullptr, UrProgram, /*CacheMutex=*/nullptr); + } + + bool hasDeviceGlobalName(const std::string &Name) const noexcept { + if (!MRTCBinInfo.has_value()) + return false; + + return std::any_of(MRTCBinInfo->MDeviceGlobalRegistries.begin(), + MRTCBinInfo->MDeviceGlobalRegistries.end(), + [&Name](const auto &DGReg) { + return DGReg->hasDeviceGlobalName(Name); + }); + } + + DeviceGlobalMapEntry *tryGetDeviceGlobalEntry(const std::string &Name) const { + if (!MRTCBinInfo.has_value()) + return nullptr; + + for (const auto &DGReg : MRTCBinInfo->MDeviceGlobalRegistries) + if (DeviceGlobalMapEntry *DGEntry = DGReg->tryGetDeviceGlobalEntry(Name)) + return DGEntry; + return nullptr; + } + + uint8_t getOriginMask() const noexcept { return MOrigins; } + + const std::optional &getRTCInfo() const noexcept { + return MRTCBinInfo; + } + + bool isNonSYCLSourceBased() const noexcept { + return (getOriginMask() & ImageOriginKernelCompiler) && + !isFromSourceLanguage(syclex::source_language::sycl); + } + + bool isFromSourceLanguage(syclex::source_language Lang) const noexcept { + return MRTCBinInfo && MRTCBinInfo->MLanguage == Lang; + } + + std::vector> buildFromSource( + const std::vector Devices, + const std::vector &BuildOptions, std::string *LogPtr, + const std::vector &RegisteredKernelNames, + std::vector> &OutDeviceBins) + const { + assert(!std::holds_alternative(MBinImage)); + assert(MRTCBinInfo); + assert(MOrigins & ImageOriginKernelCompiler); + + const std::shared_ptr &ContextImpl = + getSyclObjImpl(MContext); + + for (const auto &SyclDev : Devices) { + const DeviceImplPtr &DevImpl = getSyclObjImpl(SyclDev); + if (!ContextImpl->hasDevice(DevImpl)) { + throw sycl::exception(make_error_code(errc::invalid), + "device not part of kernel_bundle context"); + } + if (!DevImpl->extOneapiCanCompile(MRTCBinInfo->MLanguage)) { + // This error cannot not be exercised in the current implementation, as + // compatibility with a source language depends on the backend's + // capabilities and all devices in one context share the same backend in + // the current implementation, so this would lead to an error already + // during construction of the source bundle. + throw sycl::exception(make_error_code(errc::invalid), + "device does not support source language"); + } + } + + if (MRTCBinInfo->MLanguage == syclex::source_language::sycl) { + assert(std::holds_alternative(MBinImage)); + + // Build device images via the program manager. + const std::string &SourceStr = std::get(MBinImage); + std::ostringstream SourceExt; + if (!RegisteredKernelNames.empty()) { + SourceExt << SourceStr << '\n'; + + auto EmitEntry = + [&SourceExt](const std::string &Name) -> std::ostringstream & { + SourceExt << " {\"" << Name << "\", " << Name << "}"; + return SourceExt; + }; + + SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; + for (auto It = RegisteredKernelNames.begin(), + SecondToLast = RegisteredKernelNames.end() - 1; + It != SecondToLast; ++It) { + EmitEntry(*It) << ",\n"; + } + EmitEntry(RegisteredKernelNames.back()) << "\n"; + SourceExt << ")]];\n"; + } + + auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( + RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), + MRTCBinInfo->MIncludePairs, BuildOptions, LogPtr); + + auto &PM = detail::ProgramManager::getInstance(); + + // Add all binaries and keep the images for processing. + std::vector>>> + NewImages; + NewImages.reserve(Binaries->NumDeviceBinaries); + for (int I = 0; I < Binaries->NumDeviceBinaries; I++) { + sycl_device_binary Binary = &(Binaries->DeviceBinaries[I]); + RTDeviceBinaryImage *NewImage = nullptr; + auto KernelIDs = std::make_shared>(); + PM.addImage(Binary, &NewImage, KernelIDs.get()); + if (NewImage) + NewImages.push_back( + std::make_pair(std::move(NewImage), std::move(KernelIDs))); + } + + // Now bring all images into the proper state. Note that we do this in a + // separate pass over NewImages to make sure dependency images have been + // registered beforehand. + std::vector> Result; + Result.reserve(NewImages.size()); + for (auto &[NewImage, KernelIDs] : NewImages) { + std::set KernelNames; + std::unordered_map MangledKernelNames; + std::unordered_set DeviceGlobalIDSet; + std::vector DeviceGlobalIDVec; + std::vector DeviceGlobalNames; + std::vector> DeviceGlobalAllocations; + + for (const auto &KernelID : *KernelIDs) { + std::string_view KernelName{KernelID.get_name()}; + if (KernelName.find(Prefix) == 0) { + KernelName.remove_prefix(Prefix.length()); + KernelNames.emplace(KernelName); + static constexpr std::string_view SYCLKernelMarker{ + "__sycl_kernel_"}; + if (KernelName.find(SYCLKernelMarker) == 0) { + // extern "C" declaration, implicitly register kernel without the + // marker. + std::string_view KernelNameWithoutMarker{KernelName}; + KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); + MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); + } + } + + for (const sycl_device_binary_property &RKProp : + NewImage->getRegisteredKernels()) { + // Mangled names. + auto BA = DeviceBinaryProperty(RKProp).asByteArray(); + auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; + std::string_view MangledName{ + reinterpret_cast(BA.begin()), MangledNameLen}; + MangledKernelNames.emplace(RKProp->Name, MangledName); + } + + // Device globals. + for (const auto &DeviceGlobalProp : NewImage->getDeviceGlobals()) { + std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; + assert(DeviceGlobalName.find(Prefix) == 0); + bool Inserted = false; + std::tie(std::ignore, Inserted) = + DeviceGlobalIDSet.emplace(DeviceGlobalName); + if (Inserted) { + DeviceGlobalIDVec.emplace_back(DeviceGlobalName); + DeviceGlobalName.remove_prefix(Prefix.length()); + DeviceGlobalNames.emplace_back(DeviceGlobalName); + } + } + } + + // Device globals are usually statically allocated and registered in the + // integration footer, which we don't have in the RTC context. Instead, + // we dynamically allocate storage tied to the executable kernel bundle. + for (DeviceGlobalMapEntry *DeviceGlobalEntry : + PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { + + size_t AllocSize = + DeviceGlobalEntry->MDeviceGlobalTSize; // init value + if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { + // Consider storage for device USM pointer. + AllocSize += sizeof(void *); + } + auto Alloc = std::make_unique(AllocSize); + std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; + PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); + DeviceGlobalAllocations.push_back(std::move(Alloc)); + + // Drop the RTC prefix from the entry's symbol name. Note that the PM + // still manages this device global under its prefixed name. + assert(DeviceGlobalName.find(Prefix) == 0); + DeviceGlobalName.remove_prefix(Prefix.length()); + DeviceGlobalEntry->MUniqueId = DeviceGlobalName; + } + + auto DGRegs = std::make_shared( + ContextImpl, std::string{Prefix}, std::move(DeviceGlobalNames), + std::move(DeviceGlobalAllocations)); + + // Mark the image as input so the program manager will bring it into + // the right state. + auto DevImgImpl = std::make_shared( + NewImage, MContext, Devices, bundle_state::input, + std::move(KernelIDs), MRTCBinInfo->MLanguage, + std::move(KernelNames), std::move(MangledKernelNames), + std::string{Prefix}, std::move(DGRegs)); + + // Resolve dependencies. + // TODO: Consider making a collectDeviceImageDeps variant that takes a + // set reference and inserts into that instead. + std::set ImgDeps; + for (const device &Device : Devices) { + std::set DevImgDeps = + PM.collectDeviceImageDeps(*NewImage, Device); + ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); + } + + // Pack main image and dependencies together. + std::vector NewImageAndDeps; + NewImageAndDeps.reserve(1 + ImgDeps.size()); + NewImageAndDeps.push_back( + createSyclObjFromImpl(std::move(DevImgImpl))); + for (RTDeviceBinaryImage *ImgDep : ImgDeps) + NewImageAndDeps.push_back(PM.createDependencyImage( + MContext, Devices, ImgDep, bundle_state::input)); + + DevImgPlainWithDeps ImgWithDeps(std::move(NewImageAndDeps)); + PM.bringSYCLDeviceImageToState(ImgWithDeps, bundle_state::executable); + Result.push_back(getSyclObjImpl(ImgWithDeps.getMain())); + } + + OutDeviceBins.emplace_back( + std::make_shared(std::move(Binaries))); + return Result; + } + + std::vector DeviceVec; + DeviceVec.reserve(Devices.size()); + for (const auto &SyclDev : Devices) + DeviceVec.push_back(getSyclObjImpl(SyclDev)->getHandleRef()); + + ur_program_handle_t UrProgram = nullptr; + // SourceStrPtr will be null when source is Spir-V bytes. + const std::string *SourceStrPtr = std::get_if(&MBinImage); + bool FetchedFromCache = false; + if (PersistentDeviceCodeCache::isEnabled() && SourceStrPtr) { + FetchedFromCache = extKernelCompilerFetchFromCache( + Devices, BuildOptions, *SourceStrPtr, UrProgram); + } + + const AdapterPtr &Adapter = ContextImpl->getAdapter(); + + if (!FetchedFromCache) { + const auto spirv = [&]() -> std::vector { + switch (MRTCBinInfo->MLanguage) { + case syclex::source_language::opencl: { + // if successful, the log is empty. if failed, throws an error with + // the compilation log. + std::vector IPVersionVec(Devices.size()); + std::transform(DeviceVec.begin(), DeviceVec.end(), + IPVersionVec.begin(), [&](ur_device_handle_t d) { + uint32_t ipVersion = 0; + Adapter->call( + d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), + &ipVersion, nullptr); + return ipVersion; + }); + return syclex::detail::OpenCLC_to_SPIRV(*SourceStrPtr, IPVersionVec, + BuildOptions, LogPtr); + } + case syclex::source_language::spirv: { + const auto &SourceBytes = std::get>(MBinImage); + std::vector Result(SourceBytes.size()); + std::transform(SourceBytes.cbegin(), SourceBytes.cend(), + Result.begin(), + [](std::byte B) { return static_cast(B); }); + return Result; + } + default: + break; + } + throw sycl::exception( + make_error_code(errc::invalid), + "SYCL C++, OpenCL C and SPIR-V are the only supported " + "languages at this time"); + }(); + + Adapter->call( + ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr, + &UrProgram); + // program created by urProgramCreateWithIL is implicitly retained. + if (UrProgram == nullptr) + throw sycl::exception( + sycl::make_error_code(errc::invalid), + "urProgramCreateWithIL resulted in a null program handle."); + + } // if(!FetchedFromCache) + + std::string XsFlags = extractXsFlags(BuildOptions); + auto Res = Adapter->call_nocheck( + UrProgram, DeviceVec.size(), DeviceVec.data(), XsFlags.c_str()); + if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + Res = Adapter->call_nocheck( + ContextImpl->getHandleRef(), UrProgram, XsFlags.c_str()); + } + Adapter->checkUrResult(Res); + + // Get the number of kernels in the program. + size_t NumKernels; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_NUM_KERNELS, sizeof(size_t), &NumKernels, + nullptr); + + // Get the kernel names. + size_t KernelNamesSize; + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); + + // semi-colon delimited list of kernel names. + std::string KernelNamesStr(KernelNamesSize, ' '); + Adapter->call( + UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), + &KernelNamesStr[0], nullptr); + std::vector KernelNames = + detail::split_string(KernelNamesStr, ';'); + std::set KernelNameSet{KernelNames.begin(), KernelNames.end()}; + + // If caching enabled and kernel not fetched from cache, cache. + if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache && + SourceStrPtr) { + PersistentDeviceCodeCache::putCompiledKernelToDisc( + Devices, syclex::detail::userArgsAsString(BuildOptions), + *SourceStrPtr, UrProgram); + } + return std::vector>{ + std::make_shared( + MContext, Devices, bundle_state::executable, UrProgram, + MRTCBinInfo->MLanguage, std::move(KernelNameSet))}; + } + private: + bool hasRTDeviceBinaryImage() const noexcept { + return std::holds_alternative(MBinImage) && + get_bin_image_ref() != nullptr; + } + + static std::string trimXsFlags(std::string &str) { + // Trim first and last quote if they exist, but no others. + char EncounteredQuote = '\0'; + auto Start = std::find_if(str.begin(), str.end(), [&](char c) { + if (!EncounteredQuote && (c == '\'' || c == '"')) { + EncounteredQuote = c; + return false; + } + return !std::isspace(c); + }); + auto End = std::find_if(str.rbegin(), str.rend(), [&](char c) { + if (c == EncounteredQuote) { + EncounteredQuote = '\0'; + return false; + } + return !std::isspace(c); + }).base(); + if (Start != std::end(str) && End != std::begin(str) && Start < End) { + return std::string(Start, End); + } + + return ""; + } + + static std::string + extractXsFlags(const std::vector &BuildOptions) { + std::stringstream SS; + for (std::string Option : BuildOptions) { + auto Where = Option.find("-Xs"); + if (Where != std::string::npos) { + Where += 3; + std::string Flags = Option.substr(Where); + SS << trimXsFlags(Flags) << " "; + } + } + return SS.str(); + } + + bool + extKernelCompilerFetchFromCache(const std::vector Devices, + const std::vector &BuildOptions, + const std::string &SourceStr, + ur_program_handle_t &UrProgram) const { + const std::shared_ptr &ContextImpl = + getSyclObjImpl(MContext); + const AdapterPtr &Adapter = ContextImpl->getAdapter(); + + std::string UserArgs = syclex::detail::userArgsAsString(BuildOptions); + + std::vector DeviceHandles; + std::transform( + Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles), + [](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); }); + + std::vector Binaries; + std::vector Lengths; + std::vector> BinProgs = + PersistentDeviceCodeCache::getCompiledKernelFromDisc(Devices, UserArgs, + SourceStr); + if (BinProgs.empty()) { + return false; + } + for (auto &BinProg : BinProgs) { + Binaries.push_back((uint8_t *)(BinProg.data())); + Lengths.push_back(BinProg.size()); + } + + ur_program_properties_t Properties = {}; + Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; + Properties.pNext = nullptr; + Properties.count = 0; + Properties.pMetadatas = nullptr; + + Adapter->call( + ContextImpl->getHandleRef(), DeviceHandles.size(), DeviceHandles.data(), + Lengths.data(), Binaries.data(), &Properties, &UrProgram); + + return true; + } + // Get the specialization constant default value blob. ByteArray getSpecConstsDefValBlob() const { - if (!MBinImage) + if (!hasRTDeviceBinaryImage()) return ByteArray(nullptr, 0); // Get default values for specialization constants. const RTDeviceBinaryImage::PropertyRange &SCDefValRange = - MBinImage->getSpecConstantsDefaultValues(); - if (!SCDefValRange.size()) + get_bin_image_ref()->getSpecConstantsDefaultValues(); + if (!SCDefValRange.isAvailable()) return ByteArray(nullptr, 0); ByteArray DefValDescriptors = @@ -345,9 +1090,9 @@ class device_image_impl { } void updateSpecConstSymMap() { - if (MBinImage) { + if (hasRTDeviceBinaryImage()) { const RTDeviceBinaryImage::PropertyRange &SCRange = - MBinImage->getSpecConstants(); + get_bin_image_ref()->getSpecConstants(); using SCItTy = RTDeviceBinaryImage::PropertyRange::ConstIterator; // This variable is used to calculate spec constant value offset in a @@ -400,7 +1145,9 @@ class device_image_impl { } } - const RTDeviceBinaryImage *MBinImage = nullptr; + const std::variant, + const RTDeviceBinaryImage *> + MBinImage = static_cast(nullptr); context MContext; std::vector MDevices; bundle_state MState; @@ -427,6 +1174,13 @@ class device_image_impl { // Contains map of spec const names to their descriptions + offsets in // the MSpecConstsBlob std::map> MSpecConstSymMap; + + // MOrigins is a bitfield to allow cases where the image is the product of + // merging images of different origins. + uint8_t MOrigins = ImageOriginSYCLOffline; + // Optional information about the binary produced by the kernel compiler + // extension. + std::optional MRTCBinInfo = std::nullopt; }; } // namespace detail diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 91b895cc3ccf9..9b3cc9d638097 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1534,18 +1534,10 @@ void exec_graph_impl::populateURKernelUpdateStructs( std::shared_ptr SyclKernelImpl = nullptr; const sycl::detail::KernelArgMask *EliminatedArgMask = nullptr; - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the kernels - // in interop kernel bundles (if any) do not have kernel_id - // and can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - const auto &KernelName = ExecCG.MKernelName; - kernel_id KernelID = - sycl::detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - SyclKernelImpl = sycl::detail::getSyclObjImpl(SyclKernel); + if (auto SyclKernelImpl = KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel( + ExecCG.MKernelName, KernelBundleImplPtr) + : std::shared_ptr{nullptr}) { UrKernel = SyclKernelImpl->getHandleRef(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else if (Kernel != nullptr) { diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index d8afc90b48d85..e2471b5dbd94b 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -72,17 +72,14 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, const RTDeviceBinaryImage *DeviceImage = nullptr; ur_program_handle_t Program = nullptr; - if (KernelCG->getKernelBundle() != nullptr) { + auto KernelBundleImpl = KernelCG->getKernelBundle(); + if (auto SyclKernelImpl = + KernelBundleImpl + ? KernelBundleImpl->tryGetKernel(KernelName, KernelBundleImpl) + : std::shared_ptr{nullptr}) { // Retrieve the device image from the kernel bundle. - auto KernelBundle = KernelCG->getKernelBundle(); - kernel_id KernelID = - detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - - auto SyclKernel = detail::getSyclObjImpl( - KernelBundle->get_kernel(KernelID, KernelBundle)); - - DeviceImage = SyclKernel->getDeviceImage()->get_bin_image_ref(); - Program = SyclKernel->getDeviceImage()->get_ur_program_ref(); + DeviceImage = SyclKernelImpl->getDeviceImage()->get_bin_image_ref(); + Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref(); } else if (KernelCG->MSyclKernel != nullptr) { DeviceImage = KernelCG->MSyclKernel->getDeviceImage()->get_bin_image_ref(); Program = KernelCG->MSyclKernel->getDeviceImage()->get_ur_program_ref(); diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 62972f5546f85..3cac4e5b2001f 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -9,10 +9,7 @@ #pragma once #include -#include -#include #include -#include #include #include #include @@ -98,7 +95,6 @@ class kernel_bundle_impl { make_error_code(errc::invalid), "Not all devices are associated with the context or " "vector of devices is empty"); - MIsInterop = true; } // Interop constructor @@ -307,9 +303,11 @@ class kernel_bundle_impl { } for (const detail::KernelBundleImplPtr &Bundle : Bundles) { - MDeviceImages.insert(MDeviceImages.end(), Bundle->MDeviceImages.begin(), Bundle->MDeviceImages.end()); + MSharedDeviceBinaries.insert(MSharedDeviceBinaries.end(), + Bundle->MSharedDeviceBinaries.begin(), + Bundle->MSharedDeviceBinaries.end()); } fillUniqueDeviceImages(); @@ -347,142 +345,46 @@ class kernel_bundle_impl { } } - using include_pairs_t = - std::vector>; // oneapi_ext_kernel_compiler // construct from source string kernel_bundle_impl(const context &Context, syclex::source_language Lang, const std::string &Src, include_pairs_t IncludePairsVec) : MContext(Context), MDevices(Context.get_devices()), - MState(bundle_state::ext_oneapi_source), MLanguage(Lang), MSource(Src), - MIncludePairs(IncludePairsVec) {} + MDeviceImages{device_image_plain{std::make_shared( + Src, MContext, MDevices, Lang, std::move(IncludePairsVec))}}, + MUniqueDeviceImages{MDeviceImages[0].getMain()}, + MState(bundle_state::ext_oneapi_source) { + common_ctor_checks(); + } // oneapi_ext_kernel_compiler // construct from source bytes kernel_bundle_impl(const context &Context, syclex::source_language Lang, const std::vector &Bytes) : MContext(Context), MDevices(Context.get_devices()), - MState(bundle_state::ext_oneapi_source), MLanguage(Lang), - MSource(Bytes) {} - - // oneapi_ext_kernel_compiler - // interop constructor - kernel_bundle_impl(context Ctx, std::vector Devs, - device_image_plain &DevImage, - std::vector KNames, - syclex::source_language Lang) - : kernel_bundle_impl(Ctx, Devs, DevImage) { - MState = bundle_state::executable; - MKernelNames = std::move(KNames); - MLanguage = Lang; + MDeviceImages{device_image_plain{std::make_shared( + Bytes, MContext, MDevices, Lang)}}, + MUniqueDeviceImages{MDeviceImages[0].getMain()}, + MState(bundle_state::ext_oneapi_source) { + common_ctor_checks(); } // oneapi_ext_kernel_compiler - // program manager integration, only for sycl language + // construct from built source files kernel_bundle_impl( - context Ctx, std::vector Devs, - const std::vector &KernelIDs, - std::vector &&KernelNames, - std::unordered_map &&MangledKernelNames, - std::vector &&DeviceGlobalNames, - std::vector> &&DeviceGlobalAllocations, - sycl_device_binaries Binaries, std::string &&Prefix, - syclex::source_language Lang) - : kernel_bundle_impl(std::move(Ctx), std::move(Devs), KernelIDs, - bundle_state::executable) { - assert(Lang == syclex::source_language::sycl); - // Mark this bundle explicitly as "interop" to ensure that its kernels are - // enqueued with the info from the kernel object passed by the application, - // cf. `enqueueImpKernel` in `commands.cpp`. While runtime-compiled kernels - // loaded via the program manager have `kernel_id`s, they can't be looked up - // from the (unprefixed) kernel name. - MIsInterop = true; - MKernelNames = std::move(KernelNames); - MMangledKernelNames = std::move(MangledKernelNames); - MDeviceGlobalNames = std::move(DeviceGlobalNames); - MDeviceGlobalAllocations = std::move(DeviceGlobalAllocations); - MDeviceBinaries = Binaries; - MPrefix = std::move(Prefix); - MLanguage = Lang; - } - - std::string trimXsFlags(std::string &str) { - // Trim first and last quote if they exist, but no others. - char EncounteredQuote = '\0'; - auto Start = std::find_if(str.begin(), str.end(), [&](char c) { - if (!EncounteredQuote && (c == '\'' || c == '"')) { - EncounteredQuote = c; - return false; - } - return !std::isspace(c); - }); - auto End = std::find_if(str.rbegin(), str.rend(), [&](char c) { - if (c == EncounteredQuote) { - EncounteredQuote = '\0'; - return false; - } - return !std::isspace(c); - }).base(); - if (Start != std::end(str) && End != std::begin(str) && Start < End) { - return std::string(Start, End); - } - - return ""; - } - - std::string extractXsFlags(const std::vector &BuildOptions) { - std::stringstream SS; - for (std::string Option : BuildOptions) { - auto Where = Option.find("-Xs"); - if (Where != std::string::npos) { - Where += 3; - std::string Flags = Option.substr(Where); - SS << trimXsFlags(Flags) << " "; - } - } - return SS.str(); - } - - bool - extKernelCompilerFetchFromCache(const std::vector Devices, - const std::vector &BuildOptions, - const std::string &SourceStr, - ur_program_handle_t &UrProgram) { - using ContextImplPtr = std::shared_ptr; - const ContextImplPtr &ContextImpl = getSyclObjImpl(MContext); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - - std::string UserArgs = syclex::detail::userArgsAsString(BuildOptions); - - std::vector DeviceHandles; - std::transform( - Devices.begin(), Devices.end(), std::back_inserter(DeviceHandles), - [](const device &Dev) { return getSyclObjImpl(Dev)->getHandleRef(); }); - - std::vector Binaries; - std::vector Lengths; - std::vector> BinProgs = - PersistentDeviceCodeCache::getCompiledKernelFromDisc(Devices, UserArgs, - SourceStr); - if (BinProgs.empty()) { - return false; - } - for (auto &BinProg : BinProgs) { - Binaries.push_back((uint8_t *)(BinProg.data())); - Lengths.push_back(BinProg.size()); - } - - ur_program_properties_t Properties = {}; - Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES; - Properties.pNext = nullptr; - Properties.count = 0; - Properties.pMetadatas = nullptr; - - Adapter->call( - ContextImpl->getHandleRef(), DeviceHandles.size(), DeviceHandles.data(), - Lengths.data(), Binaries.data(), &Properties, &UrProgram); + const context &Context, const std::vector &Devs, + std::vector &&DevImgs, + std::vector> &&DevBinaries) + : MContext(Context), MDevices(Devs), + MSharedDeviceBinaries(std::move(DevBinaries)), + MUniqueDeviceImages(std::move(DevImgs)), + MState(bundle_state::executable) { + common_ctor_checks(); - return true; + removeDuplicateImages(); + MDeviceImages.reserve(MUniqueDeviceImages.size()); + for (const device_image_plain &DevImg : MUniqueDeviceImages) + MDeviceImages.emplace_back(DevImg); } std::shared_ptr @@ -492,393 +394,86 @@ class kernel_bundle_impl { const std::vector &RegisteredKernelNames) { assert(MState == bundle_state::ext_oneapi_source && "bundle_state::ext_oneapi_source required"); - - using ContextImplPtr = std::shared_ptr; - const ContextImplPtr &ContextImpl = getSyclObjImpl(MContext); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - - std::vector DeviceVec; - DeviceVec.reserve(Devices.size()); - for (const auto &SyclDev : Devices) { - const DeviceImplPtr &DevImpl = getSyclObjImpl(SyclDev); - if (!ContextImpl->hasDevice(DevImpl)) { - throw sycl::exception(make_error_code(errc::invalid), - "device not part of kernel_bundle context"); - } - if (!DevImpl->extOneapiCanCompile(MLanguage)) { - // This error cannot not be exercised in the current implementation, as - // compatibility with a source language depends on the backend's - // capabilities and all devices in one context share the same backend in - // the current implementation, so this would lead to an error already - // during construction of the source bundle. - throw sycl::exception(make_error_code(errc::invalid), - "device does not support source language"); - } - ur_device_handle_t Dev = DevImpl->getHandleRef(); - DeviceVec.push_back(Dev); - } - - if (MLanguage == syclex::source_language::sycl) { - // Build device images via the program manager. - const std::string &SourceStr = std::get(MSource); - std::ostringstream SourceExt; - if (!RegisteredKernelNames.empty()) { - SourceExt << SourceStr << '\n'; - - auto EmitEntry = - [&SourceExt](const std::string &Name) -> std::ostringstream & { - SourceExt << " {\"" << Name << "\", " << Name << "}"; - return SourceExt; - }; - - SourceExt << "[[__sycl_detail__::__registered_kernels__(\n"; - for (auto It = RegisteredKernelNames.begin(), - SecondToLast = RegisteredKernelNames.end() - 1; - It != SecondToLast; ++It) { - EmitEntry(*It) << ",\n"; - } - EmitEntry(RegisteredKernelNames.back()) << "\n"; - SourceExt << ")]];\n"; - } - - auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_Compile( - RegisteredKernelNames.empty() ? SourceStr : SourceExt.str(), - MIncludePairs, BuildOptions, LogPtr); - - auto &PM = detail::ProgramManager::getInstance(); - PM.addImages(Binaries); - - std::vector KernelIDs; - std::vector KernelNames; - std::unordered_map MangledKernelNames; - - std::unordered_set DeviceGlobalIDSet; - std::vector DeviceGlobalIDVec; - std::vector DeviceGlobalNames; - std::vector> DeviceGlobalAllocations; - - for (const auto &KernelID : PM.getAllSYCLKernelIDs()) { - std::string_view KernelName{KernelID.get_name()}; - if (KernelName.find(Prefix) == 0) { - KernelIDs.push_back(KernelID); - KernelName.remove_prefix(Prefix.length()); - KernelNames.emplace_back(KernelName); - static constexpr std::string_view SYCLKernelMarker{"__sycl_kernel_"}; - if (KernelName.find(SYCLKernelMarker) == 0) { - // extern "C" declaration, implicitly register kernel without the - // marker. - std::string_view KernelNameWithoutMarker{KernelName}; - KernelNameWithoutMarker.remove_prefix(SYCLKernelMarker.length()); - MangledKernelNames.emplace(KernelNameWithoutMarker, KernelName); - } - } - } - - for (const auto *RawImg : PM.getRawDeviceImages(KernelIDs)) { - // Mangled names. - for (const sycl_device_binary_property &RKProp : - RawImg->getRegisteredKernels()) { - - auto BA = DeviceBinaryProperty(RKProp).asByteArray(); - auto MangledNameLen = BA.consume() / 8 /*bits in a byte*/; - std::string_view MangledName{ - reinterpret_cast(BA.begin()), MangledNameLen}; - MangledKernelNames.emplace(RKProp->Name, MangledName); - } - - // Device globals. - for (const auto &DeviceGlobalProp : RawImg->getDeviceGlobals()) { - std::string_view DeviceGlobalName{DeviceGlobalProp->Name}; - assert(DeviceGlobalName.find(Prefix) == 0); - bool Inserted = false; - std::tie(std::ignore, Inserted) = - DeviceGlobalIDSet.emplace(DeviceGlobalName); - if (Inserted) { - DeviceGlobalIDVec.emplace_back(DeviceGlobalName); - DeviceGlobalName.remove_prefix(Prefix.length()); - DeviceGlobalNames.emplace_back(DeviceGlobalName); - } - } - } - - // Device globals are usually statically allocated and registered in the - // integration footer, which we don't have in the RTC context. Instead, we - // dynamically allocate storage tied to the executable kernel bundle. - for (DeviceGlobalMapEntry *DeviceGlobalEntry : - PM.getDeviceGlobalEntries(DeviceGlobalIDVec)) { - - size_t AllocSize = DeviceGlobalEntry->MDeviceGlobalTSize; // init value - if (!DeviceGlobalEntry->MIsDeviceImageScopeDecorated) { - // Consider storage for device USM pointer. - AllocSize += sizeof(void *); - } - auto Alloc = std::make_unique(AllocSize); - std::string_view DeviceGlobalName{DeviceGlobalEntry->MUniqueId}; - PM.addOrInitDeviceGlobalEntry(Alloc.get(), DeviceGlobalName.data()); - DeviceGlobalAllocations.push_back(std::move(Alloc)); - - // Drop the RTC prefix from the entry's symbol name. Note that the PM - // still manages this device global under its prefixed name. - assert(DeviceGlobalName.find(Prefix) == 0); - DeviceGlobalName.remove_prefix(Prefix.length()); - DeviceGlobalEntry->MUniqueId = DeviceGlobalName; - } - - return std::make_shared( - MContext, MDevices, KernelIDs, std::move(KernelNames), - std::move(MangledKernelNames), std::move(DeviceGlobalNames), - std::move(DeviceGlobalAllocations), Binaries, std::move(Prefix), - MLanguage); - } - - ur_program_handle_t UrProgram = nullptr; - // SourceStrPtr will be null when source is Spir-V bytes. - const std::string *SourceStrPtr = std::get_if(&MSource); - bool FetchedFromCache = false; - if (PersistentDeviceCodeCache::isEnabled() && SourceStrPtr) { - FetchedFromCache = extKernelCompilerFetchFromCache( - Devices, BuildOptions, *SourceStrPtr, UrProgram); - } - - if (!FetchedFromCache) { - const auto spirv = [&]() -> std::vector { - if (MLanguage == syclex::source_language::opencl) { - // if successful, the log is empty. if failed, throws an error with - // the compilation log. - std::vector IPVersionVec(Devices.size()); - std::transform(DeviceVec.begin(), DeviceVec.end(), - IPVersionVec.begin(), [&](ur_device_handle_t d) { - uint32_t ipVersion = 0; - Adapter->call( - d, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), - &ipVersion, nullptr); - return ipVersion; - }); - return syclex::detail::OpenCLC_to_SPIRV(*SourceStrPtr, IPVersionVec, - BuildOptions, LogPtr); - } - if (MLanguage == syclex::source_language::spirv) { - const auto &SourceBytes = std::get>(MSource); - std::vector Result(SourceBytes.size()); - std::transform(SourceBytes.cbegin(), SourceBytes.cend(), - Result.begin(), - [](std::byte B) { return static_cast(B); }); - return Result; - } - throw sycl::exception( - make_error_code(errc::invalid), - "SYCL C++, OpenCL C and SPIR-V are the only supported " - "languages at this time"); - }(); - - Adapter->call( - ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr, - &UrProgram); - // program created by urProgramCreateWithIL is implicitly retained. - if (UrProgram == nullptr) - throw sycl::exception( - sycl::make_error_code(errc::invalid), - "urProgramCreateWithIL resulted in a null program handle."); - - } // if(!FetchedFromCache) - - std::string XsFlags = extractXsFlags(BuildOptions); - auto Res = Adapter->call_nocheck( - UrProgram, DeviceVec.size(), DeviceVec.data(), XsFlags.c_str()); - if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - Res = Adapter->call_nocheck( - ContextImpl->getHandleRef(), UrProgram, XsFlags.c_str()); - } - Adapter->checkUrResult(Res); - - // Get the number of kernels in the program. - size_t NumKernels; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_NUM_KERNELS, sizeof(size_t), &NumKernels, - nullptr); - - // Get the kernel names. - size_t KernelNamesSize; - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &KernelNamesSize); - - // semi-colon delimited list of kernel names. - std::string KernelNamesStr(KernelNamesSize, ' '); - Adapter->call( - UrProgram, UR_PROGRAM_INFO_KERNEL_NAMES, KernelNamesStr.size(), - &KernelNamesStr[0], nullptr); - std::vector KernelNames = - detail::split_string(KernelNamesStr, ';'); - - // make the device image and the kernel_bundle_impl - auto KernelIDs = std::make_shared>(); - auto DevImgImpl = std::make_shared( - nullptr, MContext, MDevices, bundle_state::executable, KernelIDs, - UrProgram); - device_image_plain DevImg{DevImgImpl}; - - // If caching enabled and kernel not fetched from cache, cache. - if (PersistentDeviceCodeCache::isEnabled() && !FetchedFromCache && - SourceStrPtr) { - PersistentDeviceCodeCache::putCompiledKernelToDisc( - Devices, syclex::detail::userArgsAsString(BuildOptions), - *SourceStrPtr, UrProgram); - } - - return std::make_shared(MContext, MDevices, DevImg, - KernelNames, MLanguage); - } - - // Utility methods for kernel_compiler functionality -private: - std::string adjust_kernel_name(const std::string &Name) { - if (MLanguage == syclex::source_language::sycl) { - auto It = MMangledKernelNames.find(Name); - return It == MMangledKernelNames.end() ? Name : It->second; - } - - return Name; - } - - bool is_kernel_name(const std::string &Name) { - return std::find(MKernelNames.begin(), MKernelNames.end(), Name) != - MKernelNames.end(); - } - - std::string mangle_device_global_name(const std::string &Name) { - // TODO: Support device globals declared in namespaces. - return "_Z" + std::to_string(Name.length()) + Name; - } - - DeviceGlobalMapEntry *get_device_global_entry(const std::string &Name) { - if (MKernelNames.empty() || MLanguage != syclex::source_language::sycl) { - throw sycl::exception(make_error_code(errc::invalid), - "Querying device globals by name is only available " - "in kernel_bundles successfully built from " - "kernel_bundle::ext_oneapi_source> " - "with 'sycl' source language."); - } - - if (!ext_oneapi_has_device_global(Name)) { - throw sycl::exception(make_error_code(errc::invalid), - "device global '" + Name + - "' not found in kernel_bundle"); - } - - std::vector Entries = - ProgramManager::getInstance().getDeviceGlobalEntries( - {MPrefix + mangle_device_global_name(Name)}); - assert(Entries.size() == 1); - return Entries.front(); - } - - void unregister_device_globals_from_context() { - if (MDeviceGlobalNames.empty()) - return; - - // Manually trigger the release of resources for all device global map - // entries associated with this runtime-compiled bundle. Normally, this - // would happen in `~context_impl()`, however in the RTC setting, the - // context outlives the DG map entries owned by the program manager. - - std::vector DeviceGlobalIDs; - std::transform(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), - std::back_inserter(DeviceGlobalIDs), - [&](const std::string &DGName) { return MPrefix + DGName; }); - const auto &ContextImpl = getSyclObjImpl(MContext); - for (DeviceGlobalMapEntry *Entry : - ProgramManager::getInstance().getDeviceGlobalEntries( - DeviceGlobalIDs)) { - Entry->removeAssociatedResources(ContextImpl.get()); - ContextImpl->removeAssociatedDeviceGlobal(Entry->MDeviceGlobalPtr); + assert(allSourceBasedImages() && "All images must be source-based."); + + std::vector NewDevImgs; + std::vector> NewBinReso; + for (device_image_plain &DevImg : MUniqueDeviceImages) { + std::vector> NewDevImgImpls = + getSyclObjImpl(DevImg)->buildFromSource( + Devices, BuildOptions, LogPtr, RegisteredKernelNames, NewBinReso); + NewDevImgs.reserve(NewDevImgImpls.size()); + for (std::shared_ptr &DevImgImpl : NewDevImgImpls) + NewDevImgs.emplace_back(std::move(DevImgImpl)); } + return std::make_shared( + MContext, Devices, std::move(NewDevImgs), std::move(NewBinReso)); } public: - bool ext_oneapi_has_kernel(const std::string &Name) { - return !MKernelNames.empty() && is_kernel_name(adjust_kernel_name(Name)); + bool ext_oneapi_has_kernel(const std::string &Name) const { + return std::any_of(begin(), end(), + [&Name](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->hasKernelName(Name); + }); } kernel ext_oneapi_get_kernel(const std::string &Name, - const std::shared_ptr &Self) { - if (MKernelNames.empty()) + const std::shared_ptr &Self) const { + if (!hasSourceBasedImages()) throw sycl::exception(make_error_code(errc::invalid), "'ext_oneapi_get_kernel' is only available in " "kernel_bundles successfully built from " "kernel_bundle."); - std::string AdjustedName = adjust_kernel_name(Name); - if (!is_kernel_name(AdjustedName)) - throw sycl::exception(make_error_code(errc::invalid), - "kernel '" + Name + "' not found in kernel_bundle"); - - if (MLanguage == syclex::source_language::sycl) { - auto &PM = ProgramManager::getInstance(); - auto KID = PM.getSYCLKernelID(MPrefix + AdjustedName); - - for (const auto &DevImgWithDeps : MDeviceImages) { - const auto &DevImg = DevImgWithDeps.getMain(); - if (!DevImg.has_kernel(KID)) - continue; - - const auto &DevImgImpl = getSyclObjImpl(DevImg); - auto UrProgram = DevImgImpl->get_ur_program_ref(); - auto [UrKernel, CacheMutex, ArgMask] = - PM.getOrCreateKernel(MContext, AdjustedName, - /*PropList=*/{}, UrProgram); - auto KernelImpl = std::make_shared( - UrKernel, getSyclObjImpl(MContext), DevImgImpl, Self, ArgMask, - UrProgram, CacheMutex); - return createSyclObjFromImpl(std::move(KernelImpl)); - } - - assert(false && "Malformed RTC kernel bundle"); + // TODO: When linking is properly implemented for kernel compiler binaries, + // there can be scenarios where multiple binaries have the same + // kernels. In this case, all these bundles should be found and the + // resulting kernel object should be able to map devices to their + // respective backend kernel objects. + for (const device_image_plain &DevImg : MUniqueDeviceImages) { + const std::shared_ptr &DevImgImpl = + getSyclObjImpl(DevImg); + if (std::shared_ptr PotentialKernelImpl = + DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, + DevImgImpl)) + return detail::createSyclObjFromImpl( + std::move(PotentialKernelImpl)); } - - assert(MDeviceImages.size() > 0); - const std::shared_ptr &DeviceImageImpl = - detail::getSyclObjImpl(MDeviceImages[0].getMain()); - ur_program_handle_t UrProgram = DeviceImageImpl->get_ur_program_ref(); - const ContextImplPtr &ContextImpl = getSyclObjImpl(MContext); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - ur_kernel_handle_t UrKernel = nullptr; - Adapter->call(UrProgram, AdjustedName.c_str(), - &UrKernel); - // Kernel created by urKernelCreate is implicitly retained. - - std::shared_ptr KernelImpl = std::make_shared( - UrKernel, detail::getSyclObjImpl(MContext), Self); - - return detail::createSyclObjFromImpl(std::move(KernelImpl)); + throw sycl::exception(make_error_code(errc::invalid), + "kernel '" + Name + "' not found in kernel_bundle"); } std::string ext_oneapi_get_raw_kernel_name(const std::string &Name) { - if (MKernelNames.empty()) + if (!hasSourceBasedImages()) throw sycl::exception( make_error_code(errc::invalid), "'ext_oneapi_get_raw_kernel_name' is only available in " "kernel_bundles successfully built from " "kernel_bundle."); - std::string AdjustedName = adjust_kernel_name(Name); - if (!is_kernel_name(AdjustedName)) + auto It = + std::find_if(begin(), end(), [&Name](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->hasKernelName(Name); + }); + if (It == end()) throw sycl::exception(make_error_code(errc::invalid), "kernel '" + Name + "' not found in kernel_bundle"); - return AdjustedName; + return getSyclObjImpl(*It)->adjustKernelName(Name); } - bool ext_oneapi_has_device_global(const std::string &Name) { - return !MDeviceGlobalNames.empty() && - std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), - mangle_device_global_name(Name)) != - MDeviceGlobalNames.end(); + bool ext_oneapi_has_device_global(const std::string &Name) const { + return std::any_of( + begin(), end(), [&Name](const device_image_plain &DeviceImage) { + return getSyclObjImpl(DeviceImage)->hasDeviceGlobalName(Name); + }); } void *ext_oneapi_get_device_global_address(const std::string &Name, - const device &Dev) { - DeviceGlobalMapEntry *Entry = get_device_global_entry(Name); + const device &Dev) const { + DeviceGlobalMapEntry *Entry = getDeviceGlobalEntry(Name); if (std::find(MDevices.begin(), MDevices.end(), Dev) == MDevices.end()) { throw sycl::exception(make_error_code(errc::invalid), @@ -900,8 +495,8 @@ class kernel_bundle_impl { return USMMem.getPtr(); } - size_t ext_oneapi_get_device_global_size(const std::string &Name) { - return get_device_global_entry(Name)->MDeviceGlobalTSize; + size_t ext_oneapi_get_device_global_size(const std::string &Name) const { + return getDeviceGlobalEntry(Name)->MDeviceGlobalTSize; } bool empty() const noexcept { return MDeviceImages.empty(); } @@ -915,17 +510,17 @@ class kernel_bundle_impl { const std::vector &get_devices() const noexcept { return MDevices; } std::vector get_kernel_ids() const { - // RTC kernel bundles shouldn't have user-facing kernel ids, return an - // empty vector when the bundle contains RTC kernels. - if (MLanguage == syclex::source_language::sycl) { - return {}; - } // Collect kernel ids from all device images, then remove duplicates - std::vector Result; for (const device_image_plain &DeviceImage : MUniqueDeviceImages) { - const std::vector &KernelIDs = - getSyclObjImpl(DeviceImage)->get_kernel_ids(); + const auto &DevImgImpl = getSyclObjImpl(DeviceImage); + + // RTC kernel bundles shouldn't have user-facing kernel ids, return an + // empty vector when the bundle contains RTC kernels. + if (DevImgImpl->getRTCInfo()) + continue; + + const std::vector &KernelIDs = DevImgImpl->get_kernel_ids(); Result.insert(Result.end(), KernelIDs.begin(), KernelIDs.end()); } @@ -940,71 +535,12 @@ class kernel_bundle_impl { kernel get_kernel(const kernel_id &KernelID, const std::shared_ptr &Self) const { - using ImageImpl = std::shared_ptr; - // Selected image. - ImageImpl SelectedImage = nullptr; - // Image where specialization constants are replaced with default values. - ImageImpl ImageWithReplacedSpecConsts = nullptr; - // Original image where specialization constants are not replaced with - // default values. - ImageImpl OriginalImage = nullptr; - // Used to track if any of the candidate images has specialization values - // set. - bool SpecConstsSet = false; - for (const DevImgPlainWithDeps &DeviceImageWithDeps : MDeviceImages) { - const device_image_plain &DeviceImage = DeviceImageWithDeps.getMain(); - if (!DeviceImageWithDeps.getMain().has_kernel(KernelID)) - continue; - - const auto DeviceImageImpl = detail::getSyclObjImpl(DeviceImage); - SpecConstsSet |= DeviceImageImpl->is_any_specialization_constant_set(); - - // Remember current image in corresponding variable depending on whether - // specialization constants are replaced with default value or not. - (DeviceImageImpl->specialization_constants_replaced_with_default() - ? ImageWithReplacedSpecConsts - : OriginalImage) = DeviceImageImpl; - - if (SpecConstsSet) { - // If specialization constant is set in any of the candidate images - // then we can't use ReplacedImage, so we select NativeImage if any or - // we select OriginalImage and keep iterating in case there is an image - // with native support. - SelectedImage = OriginalImage; - if (SelectedImage && - SelectedImage->all_specialization_constant_native()) - break; - } else { - // For now select ReplacedImage but it may be reset if any of the - // further device images has specialization constant value set. If after - // all iterations specialization constant values are not set in any of - // the candidate images then that will be the selected image. - // Also we don't want to use ReplacedImage if device image has native - // support. - if (ImageWithReplacedSpecConsts && - !ImageWithReplacedSpecConsts->all_specialization_constant_native()) - SelectedImage = ImageWithReplacedSpecConsts; - else - // In case if we don't have or don't use ReplacedImage. - SelectedImage = OriginalImage; - } - } - - if (!SelectedImage) - throw sycl::exception(make_error_code(errc::invalid), - "The kernel bundle does not contain the kernel " - "identified by kernelId."); - - auto [Kernel, CacheMutex, ArgMask] = - detail::ProgramManager::getInstance().getOrCreateKernel( - MContext, KernelID.get_name(), /*PropList=*/{}, - SelectedImage->get_ur_program_ref()); - - std::shared_ptr KernelImpl = std::make_shared( - Kernel, detail::getSyclObjImpl(MContext), SelectedImage, Self, ArgMask, - SelectedImage->get_ur_program_ref(), CacheMutex); - - return detail::createSyclObjFromImpl(std::move(KernelImpl)); + if (std::shared_ptr KernelImpl = + tryGetOfflineKernel(KernelID, Self)) + return detail::createSyclObjFromImpl(std::move(KernelImpl)); + throw sycl::exception(make_error_code(errc::invalid), + "The kernel bundle does not contain the kernel " + "identified by kernelId."); } bool has_kernel(const kernel_id &KernelID) const noexcept { @@ -1106,8 +642,6 @@ class kernel_bundle_impl { return MSpecConstValues; } - bool isInterop() const { return MIsInterop; } - bool add_kernel(const kernel_id &KernelID, const device &Dev) { // Skip if kernel is already there if (has_kernel(KernelID, Dev)) @@ -1137,19 +671,139 @@ class kernel_bundle_impl { return true; } - ~kernel_bundle_impl() { - try { - if (MDeviceBinaries) { - unregister_device_globals_from_context(); - ProgramManager::getInstance().removeImages(MDeviceBinaries); - syclex::detail::SYCL_JIT_Destroy(MDeviceBinaries); + bool hasSourceBasedImages() const noexcept { + return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & + ImageOriginKernelCompiler; + }); + } + + bool hasSYCLOfflineImages() const noexcept { + return std::any_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & ImageOriginSYCLOffline; + }); + } + + bool allSourceBasedImages() const noexcept { + return std::all_of(begin(), end(), [](const device_image_plain &DevImg) { + return getSyclObjImpl(DevImg)->getOriginMask() & + ImageOriginKernelCompiler; + }); + } + + std::shared_ptr tryGetOfflineKernel( + const kernel_id &KernelID, + const std::shared_ptr &Self) const { + using ImageImpl = std::shared_ptr; + // Selected image. + ImageImpl SelectedImage = nullptr; + // Image where specialization constants are replaced with default values. + ImageImpl ImageWithReplacedSpecConsts = nullptr; + // Original image where specialization constants are not replaced with + // default values. + ImageImpl OriginalImage = nullptr; + // Used to track if any of the candidate images has specialization values + // set. + bool SpecConstsSet = false; + for (const DevImgPlainWithDeps &DeviceImageWithDeps : MDeviceImages) { + const device_image_plain &DeviceImage = DeviceImageWithDeps.getMain(); + if (!DeviceImageWithDeps.getMain().has_kernel(KernelID)) + continue; + + const auto DeviceImageImpl = detail::getSyclObjImpl(DeviceImage); + SpecConstsSet |= DeviceImageImpl->is_any_specialization_constant_set(); + + // Remember current image in corresponding variable depending on whether + // specialization constants are replaced with default value or not. + (DeviceImageImpl->specialization_constants_replaced_with_default() + ? ImageWithReplacedSpecConsts + : OriginalImage) = DeviceImageImpl; + + if (SpecConstsSet) { + // If specialization constant is set in any of the candidate images + // then we can't use ReplacedImage, so we select NativeImage if any or + // we select OriginalImage and keep iterating in case there is an image + // with native support. + SelectedImage = OriginalImage; + if (SelectedImage && + SelectedImage->all_specialization_constant_native()) + break; + } else { + // For now select ReplacedImage but it may be reset if any of the + // further device images has specialization constant value set. If after + // all iterations specialization constant values are not set in any of + // the candidate images then that will be the selected image. + // Also we don't want to use ReplacedImage if device image has native + // support. + if (ImageWithReplacedSpecConsts && + !ImageWithReplacedSpecConsts->all_specialization_constant_native()) + SelectedImage = ImageWithReplacedSpecConsts; + else + // In case if we don't have or don't use ReplacedImage. + SelectedImage = OriginalImage; } - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~kernel_bundle_impl", e); } + + if (!SelectedImage) + return nullptr; + + auto [Kernel, CacheMutex, ArgMask] = + detail::ProgramManager::getInstance().getOrCreateKernel( + MContext, KernelID.get_name(), /*PropList=*/{}, + SelectedImage->get_ur_program_ref()); + + return std::make_shared( + Kernel, detail::getSyclObjImpl(MContext), SelectedImage, Self, ArgMask, + SelectedImage->get_ur_program_ref(), CacheMutex); + } + + std::shared_ptr + tryGetKernel(const std::string &Name, + const std::shared_ptr &Self) const { + // TODO: For source-based kernels, it may be faster to keep a map between + // {kernel_name, device} and their corresponding image. + // First look through the kernels registered in source-based images. + for (const device_image_plain &DevImg : MUniqueDeviceImages) { + const std::shared_ptr &DevImgImpl = + getSyclObjImpl(DevImg); + if (std::shared_ptr SourceBasedKernel = + DevImgImpl->tryGetSourceBasedKernel(Name, MContext, Self, + DevImgImpl)) + return SourceBasedKernel; + } + + // Fall back to regular offline compiled kernel_bundle look-up. + if (std::optional MaybeKernelID = + sycl::detail::ProgramManager::getInstance().tryGetSYCLKernelID( + Name)) + return tryGetOfflineKernel(*MaybeKernelID, Self); + return nullptr; } private: + DeviceGlobalMapEntry *getDeviceGlobalEntry(const std::string &Name) const { + if (!hasSourceBasedImages()) { + throw sycl::exception(make_error_code(errc::invalid), + "Querying device globals by name is only available " + "in kernel_bundles successfully built from " + "kernel_bundle::ext_oneapi_source> " + "with 'sycl' source language."); + } + + if (!ext_oneapi_has_device_global(Name)) { + throw sycl::exception(make_error_code(errc::invalid), + "device global '" + Name + + "' not found in kernel_bundle"); + } + + for (const device_image_plain &DevImg : MUniqueDeviceImages) + if (DeviceGlobalMapEntry *Entry = + getSyclObjImpl(DevImg)->tryGetDeviceGlobalEntry(Name)) + return Entry; + assert(false && "Device global should have been found."); + return nullptr; + } + void fillUniqueDeviceImages() { assert(MUniqueDeviceImages.empty()); for (const DevImgPlainWithDeps &Imgs : MDeviceImages) @@ -1164,28 +818,22 @@ class kernel_bundle_impl { std::unique(MUniqueDeviceImages.begin(), MUniqueDeviceImages.end()); MUniqueDeviceImages.erase(It, MUniqueDeviceImages.end()); } + context MContext; std::vector MDevices; + + // For sycl_jit, building from source may have produced sycl binaries that + // the kernel_bundles now manage. + // NOTE: This must appear before device images to enforce their freeing of + // device globals prior to unregistering the binaries. + std::vector> MSharedDeviceBinaries; + std::vector MDeviceImages; std::vector MUniqueDeviceImages; // This map stores values for specialization constants, that are missing // from any device image. SpecConstMapT MSpecConstValues; - bool MIsInterop = false; bundle_state MState; - - // ext_oneapi_kernel_compiler : Source, Languauge, KernelNames, IncludePairs - // Language is for both state::source and state::executable. - syclex::source_language MLanguage = syclex::source_language::opencl; - const std::variant> MSource; - // only kernel_bundles created from source have KernelNames member. - std::vector MKernelNames; - std::unordered_map MMangledKernelNames; - std::vector MDeviceGlobalNames; - std::vector> MDeviceGlobalAllocations; - sycl_device_binaries MDeviceBinaries = nullptr; - std::string MPrefix; - include_pairs_t MIncludePairs; }; } // namespace detail diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 3bbff52ae4f2b..8e45fa0ef0555 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -34,14 +34,7 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, "Input context must be the same as the context of cl_kernel"); // Enable USM indirect access for interoperability kernels. - // Some UR Adapters (like OpenCL) require this call to enable USM - // For others, UR will turn this into a NOP. - if (Context->getPlatformImpl()->supports_usm()) { - bool EnableAccess = true; - getAdapter()->call( - MKernel, UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t), - nullptr, &EnableAccess); - } + enableUSMIndirectAccess(); } kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, @@ -50,10 +43,16 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, const KernelArgMask *ArgMask, ur_program_handle_t Program, std::mutex *CacheMutex) : MKernel(Kernel), MContext(std::move(ContextImpl)), MProgram(Program), - MCreatedFromSource(false), MDeviceImageImpl(std::move(DeviceImageImpl)), + MCreatedFromSource(DeviceImageImpl->isNonSYCLSourceBased()), + MDeviceImageImpl(std::move(DeviceImageImpl)), MKernelBundleImpl(std::move(KernelBundleImpl)), + MIsInterop(MDeviceImageImpl->getOriginMask() & ImageOriginInterop), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} { - MIsInterop = MKernelBundleImpl->isInterop(); + // Enable USM indirect access for interop and non-sycl-jit source kernels. + // sycl-jit kernels will enable this if needed through the regular kernel + // path. + if (MCreatedFromSource || MIsInterop) + enableUSMIndirectAccess(); } kernel_impl::~kernel_impl() { @@ -81,6 +80,19 @@ bool kernel_impl::isCreatedFromSource() const { return MCreatedFromSource; } +bool kernel_impl::isInteropOrSourceBased() const noexcept { + return isInterop() || + (MDeviceImageImpl && + (MDeviceImageImpl->getOriginMask() & ImageOriginKernelCompiler)); +} + +bool kernel_impl::hasSYCLMetadata() const noexcept { + return !isInteropOrSourceBased() || + (MDeviceImageImpl && + MDeviceImageImpl->isFromSourceLanguage( + sycl::ext::oneapi::experimental::source_language::sycl)); +} + bool kernel_impl::isBuiltInKernel(const device &Device) const { auto BuiltInKernels = Device.get_info(); if (BuiltInKernels.empty()) @@ -92,7 +104,7 @@ bool kernel_impl::isBuiltInKernel(const device &Device) const { } void kernel_impl::checkIfValidForNumArgsInfoQuery() const { - if (MKernelBundleImpl->isInterop()) + if (isInteropOrSourceBased()) return; auto Devices = MKernelBundleImpl->get_devices(); if (std::any_of(Devices.begin(), Devices.end(), @@ -106,6 +118,18 @@ void kernel_impl::checkIfValidForNumArgsInfoQuery() const { "interoperability function or to query a device built-in kernel"); } +void kernel_impl::enableUSMIndirectAccess() const { + if (!MContext->getPlatformImpl()->supports_usm()) + return; + + // Some UR Adapters (like OpenCL) require this call to enable USM + // For others, UR will turn this into a NOP. + bool EnableAccess = true; + getAdapter()->call( + MKernel, UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t), + nullptr, &EnableAccess); +} + #ifndef __INTEL_PREVIEW_BREAKING_CHANGES template <> typename info::platform::version::return_type diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 07512438f4b8c..cbef05f34fb27 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -207,6 +207,9 @@ class kernel_impl { /// \return true if kernel was created from source. bool isCreatedFromSource() const; + bool isInteropOrSourceBased() const noexcept; + bool hasSYCLMetadata() const noexcept; + const DeviceImageImplPtr &getDeviceImage() const { return MDeviceImageImpl; } ur_native_handle_t getNative() const { @@ -260,6 +263,8 @@ class kernel_impl { size_t queryMaxNumWorkGroups(queue Queue, const range &WorkGroupSize, size_t DynamicLocalMemorySize) const; + + void enableUSMIndirectAccess() const; }; template diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 83d43b899bf37..a3331019a97b2 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1893,245 +1893,256 @@ static bool shouldSkipEmptyImage(sycl_device_binary RawImg) { return true; } -void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { +void ProgramManager::addImage(sycl_device_binary RawImg, + RTDeviceBinaryImage **OutImage, + std::vector *OutKernelIDs) { const bool DumpImages = std::getenv("SYCL_DUMP_IMAGES") && !m_UseSpvFile; - for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) { - sycl_device_binary RawImg = &(DeviceBinary->DeviceBinaries[I]); - const sycl_offload_entry EntriesB = RawImg->EntriesBegin; - const sycl_offload_entry EntriesE = RawImg->EntriesEnd; - if ((EntriesB == EntriesE) && shouldSkipEmptyImage(RawImg)) - continue; + const sycl_offload_entry EntriesB = RawImg->EntriesBegin; + const sycl_offload_entry EntriesE = RawImg->EntriesEnd; + // Treat the image as empty one + if (EntriesB == EntriesE && shouldSkipEmptyImage(RawImg)) + return; - std::unique_ptr Img; - bool IsBfloat16DeviceLib = false; - uint32_t Bfloat16DeviceLibVersion = 0; - if (isDeviceImageCompressed(RawImg)) + std::unique_ptr Img; + bool IsBfloat16DeviceLib = false; + uint32_t Bfloat16DeviceLibVersion = 0; + if (isDeviceImageCompressed(RawImg)) #ifndef SYCL_RT_ZSTD_NOT_AVAIABLE - Img = std::make_unique(RawImg); + Img = std::make_unique(RawImg); #else - throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), - "Recieved a compressed device image, but " - "SYCL RT was built without ZSTD support." - "Aborting. "); + throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), + "Recieved a compressed device image, but " + "SYCL RT was built without ZSTD support." + "Aborting. "); #endif - else { - IsBfloat16DeviceLib = - isBfloat16DeviceLibImage(RawImg, &Bfloat16DeviceLibVersion); - if (!IsBfloat16DeviceLib) - Img = std::make_unique(RawImg); - } - - static uint32_t SequenceID = 0; - - // Fill the kernel argument mask map, no need to do this for bfloat16 - // device library image since it doesn't include any kernel. - if (!IsBfloat16DeviceLib) { - const RTDeviceBinaryImage::PropertyRange &KPOIRange = - Img->getKernelParamOptInfo(); - if (KPOIRange.isAvailable()) { - KernelNameToArgMaskMap &ArgMaskMap = - m_EliminatedKernelArgMasks[Img.get()]; - for (const auto &Info : KPOIRange) - ArgMaskMap[Info->Name] = - createKernelArgMask(DeviceBinaryProperty(Info).asByteArray()); - } + else { + IsBfloat16DeviceLib = + isBfloat16DeviceLibImage(RawImg, &Bfloat16DeviceLibVersion); + if (!IsBfloat16DeviceLib) + Img = std::make_unique(RawImg); + } + + // If an output image is requested, set it to the newly allocated image. + if (OutImage) + *OutImage = Img.get(); + + static uint32_t SequenceID = 0; + + // Fill the kernel argument mask map, no need to do this for bfloat16 + // device library image since it doesn't include any kernel. + if (!IsBfloat16DeviceLib) { + const RTDeviceBinaryImage::PropertyRange &KPOIRange = + Img->getKernelParamOptInfo(); + if (KPOIRange.isAvailable()) { + KernelNameToArgMaskMap &ArgMaskMap = + m_EliminatedKernelArgMasks[Img.get()]; + for (const auto &Info : KPOIRange) + ArgMaskMap[Info->Name] = + createKernelArgMask(DeviceBinaryProperty(Info).asByteArray()); } + } - // Fill maps for kernel bundles - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // Fill maps for kernel bundles + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - // For bfloat16 device library image, it doesn't include any kernel, device - // global, virtual function, so just skip adding it to any related maps. - // The bfloat16 device library are provided by compiler and may be used by - // different sycl device images, program manager will own single copy for - // native and fallback version bfloat16 device library, these device - // library images will not be erased unless program manager is destroyed. - { - if (IsBfloat16DeviceLib) { - assert((Bfloat16DeviceLibVersion < 2) && - "Invalid Bfloat16 Device Library Index."); - if (m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion].get()) - continue; - size_t ImgSize = - static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); - std::unique_ptr Data(new char[ImgSize]); - std::memcpy(Data.get(), RawImg->BinaryStart, ImgSize); - auto DynBfloat16DeviceLibImg = - std::make_unique(std::move(Data), ImgSize); - auto ESPropSet = getExportedSymbolPS(RawImg); - sycl_device_binary_property ESProp; - for (ESProp = ESPropSet->PropertiesBegin; - ESProp != ESPropSet->PropertiesEnd; ++ESProp) { - m_ExportedSymbolImages.insert( - {ESProp->Name, DynBfloat16DeviceLibImg.get()}); - } - m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion] = - std::move(DynBfloat16DeviceLibImg); - continue; + // For bfloat16 device library image, it doesn't include any kernel, device + // global, virtual function, so just skip adding it to any related maps. + // The bfloat16 device library are provided by compiler and may be used by + // different sycl device images, program manager will own single copy for + // native and fallback version bfloat16 device library, these device + // library images will not be erased unless program manager is destroyed. + { + if (IsBfloat16DeviceLib) { + assert((Bfloat16DeviceLibVersion < 2) && + "Invalid Bfloat16 Device Library Index."); + if (m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion].get()) + return; + size_t ImgSize = + static_cast(RawImg->BinaryEnd - RawImg->BinaryStart); + std::unique_ptr Data(new char[ImgSize]); + std::memcpy(Data.get(), RawImg->BinaryStart, ImgSize); + auto DynBfloat16DeviceLibImg = + std::make_unique(std::move(Data), ImgSize); + auto ESPropSet = getExportedSymbolPS(RawImg); + sycl_device_binary_property ESProp; + for (ESProp = ESPropSet->PropertiesBegin; + ESProp != ESPropSet->PropertiesEnd; ++ESProp) { + m_ExportedSymbolImages.insert( + {ESProp->Name, DynBfloat16DeviceLibImg.get()}); } + m_Bfloat16DeviceLibImages[Bfloat16DeviceLibVersion] = + std::move(DynBfloat16DeviceLibImg); + return; } + } - // Register all exported symbols - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); - } + // Register all exported symbols + for (const sycl_device_binary_property &ESProp : Img->getExportedSymbols()) { + m_ExportedSymbolImages.insert({ESProp->Name, Img.get()}); + } - // Record mapping between virtual function sets and device images - for (const sycl_device_binary_property &VFProp : - Img->getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); - for (const auto &SetName : detail::split_string(StrValue, ',')) - m_VFSet2BinImage[SetName].insert(Img.get()); - } + // Record mapping between virtual function sets and device images + for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { + std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + for (const auto &SetName : detail::split_string(StrValue, ',')) + m_VFSet2BinImage[SetName].insert(Img.get()); + } - if (DumpImages) { - const bool NeedsSequenceID = std::any_of( - m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(), - [&](auto &CurrentImg) { - return CurrentImg.first->getFormat() == Img->getFormat(); - }); + if (DumpImages) { + const bool NeedsSequenceID = + std::any_of(m_BinImg2KernelIDs.begin(), m_BinImg2KernelIDs.end(), + [&](auto &CurrentImg) { + return CurrentImg.first->getFormat() == Img->getFormat(); + }); - // Check if image is compressed, and decompress it before dumping. - CheckAndDecompressImage(Img.get()); + // Check if image is compressed, and decompress it before dumping. + CheckAndDecompressImage(Img.get()); - dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0); - } + dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0); + } - m_BinImg2KernelIDs[Img.get()].reset(new std::vector); + std::shared_ptr> &KernelIDs = + m_BinImg2KernelIDs[Img.get()]; + KernelIDs.reset(new std::vector); - for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - EntriesIt = EntriesIt->Increment()) { + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + EntriesIt = EntriesIt->Increment()) { - auto name = EntriesIt->GetName(); + auto name = EntriesIt->GetName(); - // Skip creating unique kernel ID if it is a service kernel. - // SYCL service kernels are identified by having - // __sycl_service_kernel__ in the mangled name, primarily as part of - // the namespace of the name type. - if (std::strstr(name, "__sycl_service_kernel__")) { - m_ServiceKernels.insert(std::make_pair(name, Img.get())); - continue; - } + // Skip creating unique kernel ID if it is a service kernel. + // SYCL service kernels are identified by having + // __sycl_service_kernel__ in the mangled name, primarily as part of + // the namespace of the name type. + if (std::strstr(name, "__sycl_service_kernel__")) { + m_ServiceKernels.insert(std::make_pair(name, Img.get())); + continue; + } - // Skip creating unique kernel ID if it is an exported device - // function. Exported device functions appear in the offload entries - // among kernels, but are identifiable by being listed in properties. - if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end()) - continue; + // Skip creating unique kernel ID if it is an exported device + // function. Exported device functions appear in the offload entries + // among kernels, but are identifiable by being listed in properties. + if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end()) + continue; - // ... and create a unique kernel ID for the entry - auto It = m_KernelName2KernelIDs.find(name); - if (It == m_KernelName2KernelIDs.end()) { - std::shared_ptr KernelIDImpl = - std::make_shared(name); - sycl::kernel_id KernelID = - detail::createSyclObjFromImpl( - std::move(KernelIDImpl)); + // ... and create a unique kernel ID for the entry + auto It = m_KernelName2KernelIDs.find(name); + if (It == m_KernelName2KernelIDs.end()) { + std::shared_ptr KernelIDImpl = + std::make_shared(name); + sycl::kernel_id KernelID = + detail::createSyclObjFromImpl(KernelIDImpl); - It = m_KernelName2KernelIDs.emplace_hint(It, name, std::move(KernelID)); - } - m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); - m_BinImg2KernelIDs[Img.get()]->push_back(It->second); + It = m_KernelName2KernelIDs.emplace_hint(It, name, KernelID); } + m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); + KernelIDs->push_back(It->second); + } - cacheKernelUsesAssertInfo(*Img); + cacheKernelUsesAssertInfo(*Img); - // check if kernel uses sanitizer - { - sycl_device_binary_property SanProp = Img->getProperty("sanUsed"); - if (SanProp) { - std::string SanValue = - detail::DeviceBinaryProperty(SanProp).asCString(); - - if (SanValue.rfind("asan", 0) == 0) { // starts_with - m_SanitizerFoundInImage = SanitizerType::AddressSanitizer; - } else if (SanValue.rfind("msan", 0) == 0) { - m_SanitizerFoundInImage = SanitizerType::MemorySanitizer; - } else if (SanValue.rfind("tsan", 0) == 0) { - m_SanitizerFoundInImage = SanitizerType::ThreadSanitizer; - } + // check if kernel uses sanitizer + { + sycl_device_binary_property SanProp = Img->getProperty("sanUsed"); + if (SanProp) { + std::string SanValue = detail::DeviceBinaryProperty(SanProp).asCString(); + + if (SanValue.rfind("asan", 0) == 0) { // starts_with + m_SanitizerFoundInImage = SanitizerType::AddressSanitizer; + } else if (SanValue.rfind("msan", 0) == 0) { + m_SanitizerFoundInImage = SanitizerType::MemorySanitizer; + } else if (SanValue.rfind("tsan", 0) == 0) { + m_SanitizerFoundInImage = SanitizerType::ThreadSanitizer; } } + } - cacheKernelImplicitLocalArg(*Img); + cacheKernelImplicitLocalArg(*Img); - // Sort kernel ids for faster search - std::sort(m_BinImg2KernelIDs[Img.get()]->begin(), - m_BinImg2KernelIDs[Img.get()]->end(), LessByHash{}); + // Sort kernel ids for faster search + std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash{}); - // ... and initialize associated device_global information - { - std::lock_guard DeviceGlobalsGuard(m_DeviceGlobalsMutex); + // If requested, copy the new (sorted) kernel IDs. + if (OutKernelIDs) + OutKernelIDs->insert(OutKernelIDs->end(), KernelIDs->begin(), + KernelIDs->end()); - auto DeviceGlobals = Img->getDeviceGlobals(); - for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { - ByteArray DeviceGlobalInfo = - DeviceBinaryProperty(DeviceGlobal).asByteArray(); - - // The supplied device_global info property is expected to contain: - // * 8 bytes - Size of the property. - // * 4 bytes - Size of the underlying type in the device_global. - // * 4 bytes - 0 if device_global has device_image_scope and any value - // otherwise. - DeviceGlobalInfo.dropBytes(8); - auto [TypeSize, DeviceImageScopeDecorated] = - DeviceGlobalInfo.consume(); - assert(DeviceGlobalInfo.empty() && "Extra data left!"); - - // Give the image pointer as an identifier for the image the - // device-global is associated with. - - auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name); - if (ExistingDeviceGlobal != m_DeviceGlobals.end()) { - // If it has already been registered we update the information. - ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize, - DeviceImageScopeDecorated); - } else { - // If it has not already been registered we create a new entry. - // Note: Pointer to the device global is not available here, so it - // cannot be set until registration happens. - auto EntryUPtr = std::make_unique( - DeviceGlobal->Name, Img.get(), TypeSize, - DeviceImageScopeDecorated); - m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); - } + // ... and initialize associated device_global information + { + std::lock_guard DeviceGlobalsGuard(m_DeviceGlobalsMutex); + + auto DeviceGlobals = Img->getDeviceGlobals(); + for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { + ByteArray DeviceGlobalInfo = + DeviceBinaryProperty(DeviceGlobal).asByteArray(); + + // The supplied device_global info property is expected to contain: + // * 8 bytes - Size of the property. + // * 4 bytes - Size of the underlying type in the device_global. + // * 4 bytes - 0 if device_global has device_image_scope and any value + // otherwise. + DeviceGlobalInfo.dropBytes(8); + auto [TypeSize, DeviceImageScopeDecorated] = + DeviceGlobalInfo.consume(); + assert(DeviceGlobalInfo.empty() && "Extra data left!"); + + // Give the image pointer as an identifier for the image the + // device-global is associated with. + + auto ExistingDeviceGlobal = m_DeviceGlobals.find(DeviceGlobal->Name); + if (ExistingDeviceGlobal != m_DeviceGlobals.end()) { + // If it has already been registered we update the information. + ExistingDeviceGlobal->second->initialize(Img.get(), TypeSize, + DeviceImageScopeDecorated); + } else { + // If it has not already been registered we create a new entry. + // Note: Pointer to the device global is not available here, so it + // cannot be set until registration happens. + auto EntryUPtr = std::make_unique( + DeviceGlobal->Name, Img.get(), TypeSize, DeviceImageScopeDecorated); + m_DeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); } } - // ... and initialize associated host_pipe information - { - std::lock_guard HostPipesGuard(m_HostPipesMutex); - auto HostPipes = Img->getHostPipes(); - for (const sycl_device_binary_property &HostPipe : HostPipes) { - ByteArray HostPipeInfo = DeviceBinaryProperty(HostPipe).asByteArray(); - - // The supplied host_pipe info property is expected to contain: - // * 8 bytes - Size of the property. - // * 4 bytes - Size of the underlying type in the host_pipe. - // Note: Property may be padded. - - HostPipeInfo.dropBytes(8); - auto TypeSize = HostPipeInfo.consume(); - assert(HostPipeInfo.empty() && "Extra data left!"); - - auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name); - if (ExistingHostPipe != m_HostPipes.end()) { - // If it has already been registered we update the information. - ExistingHostPipe->second->initialize(TypeSize); - ExistingHostPipe->second->initialize(Img.get()); - } else { - // If it has not already been registered we create a new entry. - // Note: Pointer to the host pipe is not available here, so it - // cannot be set until registration happens. - auto EntryUPtr = - std::make_unique(HostPipe->Name, TypeSize); - EntryUPtr->initialize(Img.get()); - m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr)); - } + } + // ... and initialize associated host_pipe information + { + std::lock_guard HostPipesGuard(m_HostPipesMutex); + auto HostPipes = Img->getHostPipes(); + for (const sycl_device_binary_property &HostPipe : HostPipes) { + ByteArray HostPipeInfo = DeviceBinaryProperty(HostPipe).asByteArray(); + + // The supplied host_pipe info property is expected to contain: + // * 8 bytes - Size of the property. + // * 4 bytes - Size of the underlying type in the host_pipe. + // Note: Property may be padded. + + HostPipeInfo.dropBytes(8); + auto TypeSize = HostPipeInfo.consume(); + assert(HostPipeInfo.empty() && "Extra data left!"); + + auto ExistingHostPipe = m_HostPipes.find(HostPipe->Name); + if (ExistingHostPipe != m_HostPipes.end()) { + // If it has already been registered we update the information. + ExistingHostPipe->second->initialize(TypeSize); + ExistingHostPipe->second->initialize(Img.get()); + } else { + // If it has not already been registered we create a new entry. + // Note: Pointer to the host pipe is not available here, so it + // cannot be set until registration happens. + auto EntryUPtr = + std::make_unique(HostPipe->Name, TypeSize); + EntryUPtr->initialize(Img.get()); + m_HostPipes.emplace(HostPipe->Name, std::move(EntryUPtr)); } } - m_DeviceImages.insert({RawImg, std::move(Img)}); } + + m_DeviceImages.insert({RawImg, std::move(Img)}); +} + +void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { + for (int I = 0; I < DeviceBinary->NumDeviceBinaries; I++) + addImage(&(DeviceBinary->DeviceBinaries[I])); } void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { @@ -2353,17 +2364,24 @@ static bundle_state getBinImageState(const RTDeviceBinaryImage *BinImage) { : sycl::bundle_state::object; } -kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) { +std::optional +ProgramManager::tryGetSYCLKernelID(const std::string &KernelName) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); auto KernelID = m_KernelName2KernelIDs.find(KernelName); if (KernelID == m_KernelName2KernelIDs.end()) - throw exception(make_error_code(errc::runtime), - "No kernel found with the specified name"); + return std::nullopt; return KernelID->second; } +kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) { + if (std::optional MaybeKernelID = tryGetSYCLKernelID(KernelName)) + return *MaybeKernelID; + throw exception(make_error_code(errc::runtime), + "No kernel found with the specified name"); +} + bool ProgramManager::hasCompatibleImage(const device &Dev) { std::lock_guard Guard(m_KernelIDsMutex); @@ -2437,6 +2455,20 @@ ProgramManager::getDeviceGlobalEntry(const void *DeviceGlobalPtr) { return Entry->second; } +DeviceGlobalMapEntry * +ProgramManager::tryGetDeviceGlobalEntry(const std::string &UniqueId, + bool ExcludeDeviceImageScopeDecorated) { + std::lock_guard DeviceGlobalsGuard(m_DeviceGlobalsMutex); + auto DeviceGlobalEntry = m_DeviceGlobals.find(UniqueId); + assert(DeviceGlobalEntry != m_DeviceGlobals.end() && + "Device global not found in map."); + if (DeviceGlobalEntry != m_DeviceGlobals.end() && + (!ExcludeDeviceImageScopeDecorated || + !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)) + return DeviceGlobalEntry->second.get(); + return nullptr; +} + std::vector ProgramManager::getDeviceGlobalEntries( const std::vector &UniqueIds, bool ExcludeDeviceImageScopeDecorated) { @@ -2666,93 +2698,100 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( Images.reserve(Deps.size() + 1); Images.push_back( createSyclObjFromImpl(std::move(MainImpl))); - for (RTDeviceBinaryImage *Dep : Deps) { - std::shared_ptr> DepKernelIDs; - { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - // For device library images, they are not in m_BinImg2KernelIDs since - // no kernel is included. - auto DepIt = m_BinImg2KernelIDs.find(Dep); - if (DepIt != m_BinImg2KernelIDs.end()) - DepKernelIDs = DepIt->second; - } - - assert(ImgInfoPair.second.State == getBinImageState(Dep) && - "State mismatch between main image and its dependency"); - DeviceImageImplPtr DepImpl = std::make_shared( - Dep, Ctx, Devs, ImgInfoPair.second.State, DepKernelIDs, - /*PIProgram=*/nullptr); - + for (RTDeviceBinaryImage *Dep : Deps) Images.push_back( - createSyclObjFromImpl(std::move(DepImpl))); - } + createDependencyImage(Ctx, Devs, Dep, ImgInfoPair.second.State)); SYCLDeviceImages.push_back(std::move(Images)); } return SYCLDeviceImages; } -void ProgramManager::bringSYCLDeviceImagesToState( - std::vector &DeviceImages, bundle_state TargetState) { - - for (DevImgPlainWithDeps &ImgWithDeps : DeviceImages) { - device_image_plain &MainImg = ImgWithDeps.getMain(); - const DeviceImageImplPtr &MainImgImpl = getSyclObjImpl(MainImg); - const bundle_state DevImageState = MainImgImpl->get_state(); - // At this time, there is no circumstance where a device image should ever - // be in the source state. That not good. - assert(DevImageState != bundle_state::ext_oneapi_source); - - switch (TargetState) { +device_image_plain ProgramManager::createDependencyImage( + const context &Ctx, const std::vector &Devs, + RTDeviceBinaryImage *DepImage, bundle_state DepState) { + std::shared_ptr> DepKernelIDs; + { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // For device library images, they are not in m_BinImg2KernelIDs since + // no kernel is included. + auto DepIt = m_BinImg2KernelIDs.find(DepImage); + if (DepIt != m_BinImg2KernelIDs.end()) + DepKernelIDs = DepIt->second; + } + + assert(DepState == getBinImageState(DepImage) && + "State mismatch between main image and its dependency"); + DeviceImageImplPtr DepImpl = std::make_shared( + DepImage, Ctx, Devs, DepState, DepKernelIDs, /*PIProgram=*/nullptr); + + return createSyclObjFromImpl(std::move(DepImpl)); +} + +void ProgramManager::bringSYCLDeviceImageToState( + DevImgPlainWithDeps &DeviceImage, bundle_state TargetState) { + device_image_plain &MainImg = DeviceImage.getMain(); + const DeviceImageImplPtr &MainImgImpl = getSyclObjImpl(MainImg); + const bundle_state DevImageState = getSyclObjImpl(MainImg)->get_state(); + // At this time, there is no circumstance where a device image should ever + // be in the source state. That not good. + assert(DevImageState != bundle_state::ext_oneapi_source); + + switch (TargetState) { + case bundle_state::ext_oneapi_source: + // This case added for switch statement completion. We should not be here. + assert(DevImageState == bundle_state::ext_oneapi_source); + break; + case bundle_state::input: + // Do nothing since there is no state which can be upgraded to the input. + assert(DevImageState == bundle_state::input); + break; + case bundle_state::object: + if (DevImageState == bundle_state::input) { + DeviceImage = compile(DeviceImage, MainImgImpl->get_devices(), + /*PropList=*/{}); + break; + } + // Device image is expected to be object state then. + assert(DevImageState == bundle_state::object); + break; + case bundle_state::executable: { + switch (DevImageState) { case bundle_state::ext_oneapi_source: - // This case added for switch statement completion. We should not be here. - assert(DevImageState == bundle_state::ext_oneapi_source); + // This case added for switch statement completion. + // We should not be here. + assert(DevImageState != bundle_state::ext_oneapi_source); break; case bundle_state::input: - // Do nothing since there is no state which can be upgraded to the input. - assert(DevImageState == bundle_state::input); - break; - case bundle_state::object: - if (DevImageState == bundle_state::input) { - ImgWithDeps = compile(ImgWithDeps, MainImgImpl->get_devices(), - /*PropList=*/{}); - break; - } - // Device image is expected to be object state then. - assert(DevImageState == bundle_state::object); + DeviceImage = build(DeviceImage, MainImgImpl->get_devices(), + /*PropList=*/{}); break; - case bundle_state::executable: { - switch (DevImageState) { - case bundle_state::ext_oneapi_source: - // This case added for switch statement completion. - // We should not be here. - assert(DevImageState != bundle_state::ext_oneapi_source); - break; - case bundle_state::input: - ImgWithDeps = build(ImgWithDeps, MainImgImpl->get_devices(), - /*PropList=*/{}); - break; - case bundle_state::object: { - std::vector LinkedDevImages = - link(ImgWithDeps, getSyclObjImpl(MainImg)->get_devices(), - /*PropList=*/{}); - // Since only one device image is passed here one output device image is - // expected - assert(LinkedDevImages.size() == 1 && "Expected one linked image here"); - ImgWithDeps = LinkedDevImages[0]; - break; - } - case bundle_state::executable: - ImgWithDeps = build(ImgWithDeps, MainImgImpl->get_devices(), - /*PropList=*/{}); - break; - } + case bundle_state::object: { + std::vector LinkedDevImages = + link(DeviceImage, MainImgImpl->get_devices(), + /*PropList=*/{}); + // Since only one device image is passed here one output device image is + // expected + assert(LinkedDevImages.size() == 1 && "Expected one linked image here"); + DeviceImage = LinkedDevImages[0]; break; } + case bundle_state::executable: + DeviceImage = build(DeviceImage, MainImgImpl->get_devices(), + /*PropList=*/{}); + break; } + break; + } } } +void ProgramManager::bringSYCLDeviceImagesToState( + std::vector &DeviceImages, bundle_state TargetState) { + for (DevImgPlainWithDeps &ImgWithDeps : DeviceImages) + bringSYCLDeviceImageToState(ImgWithDeps, TargetState); +} + std::vector ProgramManager::getSYCLDeviceImages(const context &Ctx, const std::vector &Devs, @@ -2851,11 +2890,14 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) setSpecializationConstants(InputImpl, Prog, Adapter); + std::optional RTCInfo = + InputImpl->getRTCInfo(); DeviceImageImplPtr ObjectImpl = std::make_shared( InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs, bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog, InputImpl->get_spec_const_data_ref(), - InputImpl->get_spec_const_blob_ref()); + InputImpl->get_spec_const_blob_ref(), InputImpl->getOriginMask(), + std::move(RTCInfo)); std::string CompileOptions; applyCompileOptionsFromEnvironment(CompileOptions); @@ -3008,11 +3050,26 @@ ProgramManager::link(const DevImgPlainWithDeps &ImgWithDeps, } } + // The origin becomes the combination of all the origins. + uint8_t CombinedOrigins = 0; + // For the kernel compiler binary info, we collect pointers to all of the + // input ones and then merge them afterwards. + std::vector *> + RTCInfoPtrs; + RTCInfoPtrs.reserve(ImgWithDeps.size()); + for (const device_image_plain &DevImg : ImgWithDeps) { + const DeviceImageImplPtr &DevImgImpl = getSyclObjImpl(DevImg); + CombinedOrigins |= DevImgImpl->getOriginMask(); + RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); + } + auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); + auto BinImg = InputImpl->get_bin_image_ref(); DeviceImageImplPtr ExecutableImpl = std::make_shared( BinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), - LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob)); + LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), + CombinedOrigins, std::move(MergedRTCInfo)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -3064,13 +3121,26 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, SpecConstMap = MainInputImpl->get_spec_const_data_ref(); } + // The origin becomes the combination of all the origins. + uint8_t CombinedOrigins = 0; + for (const device_image_plain &DevImg : DevImgWithDeps) + CombinedOrigins |= getSyclObjImpl(DevImg)->getOriginMask(); + + std::vector *> + RTCInfoPtrs; + RTCInfoPtrs.reserve(DevImgWithDeps.size()); + for (const device_image_plain &DevImg : DevImgWithDeps) + RTCInfoPtrs.emplace_back(&(getSyclObjImpl(DevImg)->getRTCInfo())); + auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); + ur_program_handle_t ResProgram = getBuiltURProgram( std::move(BinImgs), Context, Devs, &DevImgWithDeps, SpecConstBlob); DeviceImageImplPtr ExecImpl = std::make_shared( MainInputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable, std::move(KernelIDs), ResProgram, - std::move(SpecConstMap), std::move(SpecConstBlob)); + std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, + std::move(MergedRTCInfo)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index ae8d2fc217ccb..f683a1c675935 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -216,6 +216,9 @@ class ProgramManager { ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, const ContextImplPtr &Context); + void addImage(sycl_device_binary RawImg, + RTDeviceBinaryImage **OutImage = nullptr, + std::vector *OutKernelIDs = nullptr); void addImages(sycl_device_binaries DeviceImages); void removeImages(sycl_device_binaries DeviceImages); void debugPrintBinaryImages() const; @@ -233,7 +236,11 @@ class ProgramManager { const std::string &KernelName); // The function returns the unique SYCL kernel identifier associated with a - // kernel name. + // kernel name or nullopt if there is no such ID. + std::optional tryGetSYCLKernelID(const std::string &KernelName); + + // The function returns the unique SYCL kernel identifier associated with a + // kernel name or throws a sycl exception if there is no such ID. kernel_id getSYCLKernelID(const std::string &KernelName); // The function returns a vector containing all unique SYCL kernel identifiers @@ -256,6 +263,13 @@ class ProgramManager { // device_global object from the device_global map. DeviceGlobalMapEntry *getDeviceGlobalEntry(const void *DeviceGlobalPtr); + // The function attempts to get a single device_global entry identified by its + // unique ID from the device_global map. If no such entry is found, nullptr is + // returned. + DeviceGlobalMapEntry * + tryGetDeviceGlobalEntry(const std::string &UniqueId, + bool ExcludeDeviceImageScopeDecorated = false); + // The function gets multiple device_global entries identified by their unique // IDs from the device_global map. std::vector @@ -283,7 +297,17 @@ class ProgramManager { const context &Ctx, const std::vector &Devs, bundle_state TargetState, const std::vector &KernelIDs = {}); - // Brind images in the passed vector to the required state. Does it inplace + // Creates a new dependency image for a given dependency binary image. + device_image_plain createDependencyImage(const context &Ctx, + const std::vector &Devs, + RTDeviceBinaryImage *DepImage, + bundle_state DepState); + + // Bring image to the required state. Does it inplace + void bringSYCLDeviceImageToState(DevImgPlainWithDeps &DeviceImage, + bundle_state TargetState); + + // Bring images in the passed vector to the required state. Does it inplace void bringSYCLDeviceImagesToState(std::vector &DeviceImages, bundle_state TargetState); @@ -345,6 +369,12 @@ class ProgramManager { std::set getRawDeviceImages(const std::vector &KernelIDs); + std::set + collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev); + std::set + collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, + const device &Dev); + private: ProgramManager(ProgramManager const &) = delete; ProgramManager &operator=(ProgramManager const &) = delete; @@ -368,11 +398,6 @@ class ProgramManager { /// Add info on kernels using local arg into cache void cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img); - std::set - collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device &Dev); - std::set - collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, - const device &Dev); std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device &Dev); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index da434e1fe50f5..2c5d13a1e3831 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -875,9 +875,10 @@ class queue_impl { if (IsKernel) // Kernel only uses assert if it's non interop one - KernelUsesAssert = !(Handler.MKernel && Handler.MKernel->isInterop()) && - ProgramManager::getInstance().kernelUsesAssert( - Handler.MKernelName.c_str()); + KernelUsesAssert = + (!Handler.MKernel || Handler.MKernel->hasSYCLMetadata()) && + ProgramManager::getInstance().kernelUsesAssert( + Handler.MKernelName.c_str()); auto Event = MIsInorder ? finalizeHandlerInOrder(Handler) : finalizeHandlerOutOfOrder(Handler); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 1a59c22e2c280..b9ffdcc73ade3 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1995,24 +1995,12 @@ void instrumentationAddExtraKernelMetadata( std::mutex *KernelMutex = nullptr; const KernelArgMask *EliminatedArgMask = nullptr; - std::shared_ptr SyclKernelImpl; - std::shared_ptr DeviceImageImpl; - - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the - // kernels in interop kernel bundles (if any) do not have kernel_id and - // can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - kernel_id KernelID = - detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - std::shared_ptr KernelImpl = - detail::getSyclObjImpl(SyclKernel); - - EliminatedArgMask = KernelImpl->getKernelArgMask(); - Program = KernelImpl->getDeviceImage()->get_ur_program_ref(); + if (auto SyclKernelImpl = KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel( + KernelName, KernelBundleImplPtr) + : std::shared_ptr{nullptr}) { + EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); + Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref(); } else if (nullptr != SyclKernel) { Program = SyclKernel->getProgramRef(); if (!SyclKernel->isCreatedFromSource()) @@ -2530,21 +2518,13 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, ur_kernel_handle_t UrKernel = nullptr; std::shared_ptr DeviceImageImpl = nullptr; const KernelArgMask *EliminatedArgMask = nullptr; + auto &KernelBundleImplPtr = CommandGroup.MKernelBundle; - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the kernels - // in interop kernel bundles (if any) do not have kernel_id - // and can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (auto KernelBundleImplPtr = CommandGroup.MKernelBundle; - KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( - CommandGroup.MKernelName); - - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - - auto SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); + if (auto SyclKernelImpl = + KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel(CommandGroup.MKernelName, + KernelBundleImplPtr) + : std::shared_ptr{nullptr}) { UrKernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); @@ -2697,19 +2677,10 @@ void enqueueImpKernel( std::shared_ptr SyclKernelImpl; std::shared_ptr DeviceImageImpl; - // Use kernel_bundle if available unless it is interop. - // Interop bundles can't be used in the first branch, because the kernels - // in interop kernel bundles (if any) do not have kernel_id - // and can therefore not be looked up, but since they are self-contained - // they can simply be launched directly. - if (KernelBundleImplPtr && !KernelBundleImplPtr->isInterop()) { - kernel_id KernelID = - detail::ProgramManager::getInstance().getSYCLKernelID(KernelName); - kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - - SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); - + if ((SyclKernelImpl = KernelBundleImplPtr + ? KernelBundleImplPtr->tryGetKernel( + KernelName, KernelBundleImplPtr) + : std::shared_ptr{nullptr})) { Kernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = SyclKernelImpl->getDeviceImage(); @@ -3271,7 +3242,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (!EventImpl) { // Kernel only uses assert if it's non interop one bool KernelUsesAssert = - !(SyclKernel && SyclKernel->isInterop()) && + (!SyclKernel || SyclKernel->hasSYCLMetadata()) && ProgramManager::getInstance().kernelUsesAssert(KernelName); if (KernelUsesAssert) { EventImpl = MEvent; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index ffb28d2ab6ff9..effb7190c5ae9 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -429,8 +429,12 @@ event handler::finalize() { getOrInsertHandlerKernelBundle(/*Insert=*/false); if (KernelBundleImpPtr) { // Make sure implicit non-interop kernel bundles have the kernel - if (!KernelBundleImpPtr->isInterop() && - !impl->isStateExplicitKernelBundle()) { + if (!impl->isStateExplicitKernelBundle() && + !(MKernel && MKernel->isInterop()) && + (KernelBundleImpPtr->empty() || + KernelBundleImpPtr->hasSYCLOfflineImages()) && + !KernelBundleImpPtr->tryGetKernel(MKernelName.c_str(), + KernelBundleImpPtr)) { auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); kernel_id KernelID = diff --git a/sycl/test-e2e/KernelCompiler/sycl_join.cpp b/sycl/test-e2e/KernelCompiler/sycl_join.cpp new file mode 100644 index 0000000000000..2b4dc8f8e1bec --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/sycl_join.cpp @@ -0,0 +1,284 @@ +//==----------- sycl_join.cpp --- kernel_compiler extension tests ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_shared_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/17255 + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include +#include +#include + +auto constexpr SYCLSource1 = R"""( +#include + +extern "C" SYCL_EXTERNAL +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel1(int *Ptr) { + *Ptr = 42; +} +)"""; + +auto constexpr SYCLSource2 = R"""( +#include + +extern "C" SYCL_EXTERNAL +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (sycl::ext::oneapi::experimental::single_task_kernel)) +void TestKernel2(int *Ptr) { + *Ptr = 24; +} +)"""; + +namespace syclex = sycl::ext::oneapi::experimental; +using source_kb = sycl::kernel_bundle; +using exe_kb = sycl::kernel_bundle; + +class RegularSYCLKernel; + +constexpr int RegularSYCLKernelWriteValue = 4224; + +void RunRegularSYCLKernel(sycl::queue Q, const exe_kb &Bundle, int *Ptr) { + Q.submit([&](sycl::handler &CGH) { + CGH.use_kernel_bundle(Bundle); + CGH.single_task( + [=]() { *Ptr = RegularSYCLKernelWriteValue; }); + }).wait_and_throw(); +} + +int main() { + + sycl::queue Q; + sycl::context Ctx = Q.get_context(); + + if (!Q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl)) { + std::cout << "Apparently this device does not support `sycl` source " + "kernel bundle extension: " + << Q.get_device().get_info() + << std::endl; + return -1; + } + + int Failed = 0; + + source_kb KBSrc1 = syclex::create_kernel_bundle_from_source( + Ctx, syclex::source_language::sycl, SYCLSource1); + source_kb KBSrc2 = syclex::create_kernel_bundle_from_source( + Ctx, syclex::source_language::sycl, SYCLSource2); + + // Test joining of source kernel bundles. + { + std::vector KBSrcs{KBSrc1, KBSrc2}; + source_kb KBSrcJoined = sycl::join(KBSrcs); + + exe_kb KBExeJoined = syclex::build(KBSrcJoined); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout << "TestKernel1 in joined source bundles failed: " << *IntPtr + << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout << "TestKernel1 in joined source bundles failed: " << *IntPtr + << " != 24\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + + auto KBExe1 = std::make_shared(syclex::build(KBSrc1)); + auto KBExe2 = std::make_shared(syclex::build(KBSrc2)); + + // Test joining of source-based executable kernel bundles. + { + std::vector KBExes{*KBExe1, *KBExe2}; + + exe_kb KBExeJoined = sycl::join(KBExes); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout + << "TestKernel1 in joined source-based executable bundles failed: " + << *IntPtr << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout + << "TestKernel1 in joined source-based executable bundles failed: " + << *IntPtr << " != 24\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + + // Test joining of executable kernel bundles. + { + sycl::kernel_id RegularSYCLKernelID = + sycl::get_kernel_id(); + std::vector RegularSYCLKernelIDs{RegularSYCLKernelID}; + exe_kb RegularKBExe = + sycl::get_kernel_bundle( + Ctx, RegularSYCLKernelIDs); + std::vector KBExes{*KBExe1, *KBExe2, RegularKBExe}; + + exe_kb KBExeJoined = sycl::join(KBExes); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + assert(KBExeJoined.has_kernel(RegularSYCLKernelID, Q.get_device())); + assert(KBExeJoined.has_kernel()); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + sycl::kernel RegularSYCLK = KBExeJoined.get_kernel(RegularSYCLKernelID); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout << "TestKernel1 in joined mixed executable bundles failed: " + << *IntPtr << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout << "TestKernel1 in joined mixed executable bundles failed: " + << *IntPtr << " != 24\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(RegularSYCLK); + }).wait_and_throw(); + + if (*IntPtr != RegularSYCLKernelWriteValue) { + std::cout << "Regular SYCL kernel (explicit) in joined mixed executable " + "bundles failed: " + << *IntPtr << " != " << RegularSYCLKernelWriteValue << "\n"; + ++Failed; + } + *IntPtr = 0; + + RunRegularSYCLKernel(Q, KBExeJoined, IntPtr); + + if (*IntPtr != RegularSYCLKernelWriteValue) { + std::cout << "Regular SYCL kernel (implicit) in joined mixed executable " + "bundles failed: " + << *IntPtr << " != " << RegularSYCLKernelWriteValue << "\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + + // Test joining of executable kernel bundles with the original bundles dying + // before the parent. + { + std::vector KBExes{*KBExe1, *KBExe2}; + + KBExe1.reset(); + KBExe2.reset(); + + exe_kb KBExeJoined = sycl::join(KBExes); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel1")); + assert(KBExeJoined.ext_oneapi_has_kernel("TestKernel2")); + + sycl::kernel K1 = KBExeJoined.ext_oneapi_get_kernel("TestKernel1"); + sycl::kernel K2 = KBExeJoined.ext_oneapi_get_kernel("TestKernel2"); + + int *IntPtr = sycl::malloc_shared(1, Q); + *IntPtr = 0; + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K1); + }).wait_and_throw(); + + if (*IntPtr != 42) { + std::cout << "TestKernel1 in joined source-based executable bundles with " + "dead parents failed: " + << *IntPtr << " != 42\n"; + ++Failed; + } + + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(IntPtr); + CGH.single_task(K2); + }).wait_and_throw(); + + if (*IntPtr != 24) { + std::cout << "TestKernel1 in joined source-based executable bundles with " + "dead parents failed: " + << *IntPtr << " != 24\n"; + ++Failed; + } + + sycl::free(IntPtr, Q); + } + + return Failed; +} diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index a27efa2dcfc8f..1f8ce4897a5f2 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 13 +// CHECK-NUM-MATCHES: 14 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 80c7ddf07c1b2..a290a68542bdb 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -163,11 +163,9 @@ const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( EXPECT_TRUE(KernelBundleImplPtr) << "Expect command group to contain kernel bundle"; - auto KernelID = sycl::detail::ProgramManager::getInstance().getSYCLKernelID( - ExecKernel->MKernelName); - sycl::kernel SyclKernel = - KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); - auto SyclKernelImpl = sycl::detail::getSyclObjImpl(SyclKernel); + auto SyclKernelImpl = KernelBundleImplPtr->tryGetKernel( + ExecKernel->MKernelName, KernelBundleImplPtr); + EXPECT_TRUE(SyclKernelImpl != nullptr); std::shared_ptr DeviceImageImpl = SyclKernelImpl->getDeviceImage(); ur_program_handle_t Program = DeviceImageImpl->get_ur_program_ref();