diff --git a/sycl/gdb/libsycl.so-gdb.py b/sycl/gdb/libsycl.so-gdb.py index 039e5cacf76e7..5b1eb18292e14 100644 --- a/sycl/gdb/libsycl.so-gdb.py +++ b/sycl/gdb/libsycl.so-gdb.py @@ -374,8 +374,8 @@ def range_common_array(self): class SYCLDevice(SYCLValue): """Provides information about a sycl::device from a gdb.Value.""" - IMPL_OFFSET_TO_DEVICE_TYPE = 0x8 - IMPL_OFFSET_TO_PLATFORM = 0x18 + IMPL_OFFSET_TO_DEVICE_TYPE = 0x18 + IMPL_OFFSET_TO_PLATFORM = 0x28 PLATFORM_OFFSET_TO_BACKEND = 0x20 def __init__(self, gdb_value): diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index cfbe9b8d339c5..e2657396bfa06 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -214,7 +214,6 @@ class HandlerAccess; class HostTask; using EventImplPtr = std::shared_ptr; -using DeviceImplPtr = std::shared_ptr; template static Arg member_ptr_helper(RetType (Func::*)(Arg) const); @@ -251,7 +250,7 @@ template struct get_kernel_wrapper_name_t { }; __SYCL_EXPORT device getDeviceFromHandler(handler &); -const DeviceImplPtr &getDeviceImplFromHandler(handler &); +device_impl &getDeviceImplFromHandler(handler &); // Checks if a device_global has any registered kernel usage. __SYCL_EXPORT bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr); @@ -3303,8 +3302,7 @@ class __SYCL_EXPORT handler { typename PropertyListT> friend class accessor; friend device detail::getDeviceFromHandler(handler &); - friend const detail::DeviceImplPtr & - detail::getDeviceImplFromHandler(handler &); + friend detail::device_impl &detail::getDeviceImplFromHandler(handler &); template diff --git a/sycl/source/backend/opencl.cpp b/sycl/source/backend/opencl.cpp index 011593d8a4480..263044c669023 100644 --- a/sycl/source/backend/opencl.cpp +++ b/sycl/source/backend/opencl.cpp @@ -65,10 +65,9 @@ __SYCL_EXPORT bool has_extension(const sycl::device &SyclDevice, "has_extension can only be used with an OpenCL backend"); } - std::shared_ptr DeviceImpl = - getSyclObjImpl(SyclDevice); - ur_device_handle_t AdapterDevice = DeviceImpl->getHandleRef(); - const AdapterPtr &Adapter = DeviceImpl->getAdapter(); + detail::device_impl &DeviceImpl = *getSyclObjImpl(SyclDevice); + ur_device_handle_t AdapterDevice = DeviceImpl.getHandleRef(); + const AdapterPtr &Adapter = DeviceImpl.getAdapter(); // Manual invocation of UR API to avoid using deprecated // info::device::extensions call. diff --git a/sycl/source/detail/allowlist.cpp b/sycl/source/detail/allowlist.cpp index e80773b5b14ac..b173fc45c85d5 100644 --- a/sycl/source/detail/allowlist.cpp +++ b/sycl/source/detail/allowlist.cpp @@ -396,7 +396,7 @@ void applyAllowList(std::vector &UrDevices, int InsertIDx = 0; for (ur_device_handle_t Device : UrDevices) { - auto DeviceImpl = PlatformImpl.getOrMakeDeviceImpl(Device); + device_impl &DeviceImpl = PlatformImpl.getOrMakeDeviceImpl(Device); // get DeviceType value and put it to DeviceDesc ur_device_type_t UrDevType = UR_DEVICE_TYPE_ALL; Adapter->call( @@ -429,20 +429,18 @@ void applyAllowList(std::vector &UrDevices, } // get DeviceVendorId value and put it to DeviceDesc uint32_t DeviceVendorIdUInt = - sycl::detail::get_device_info( - *DeviceImpl.get()); + sycl::detail::get_device_info(DeviceImpl); std::stringstream DeviceVendorIdHexStringStream; DeviceVendorIdHexStringStream << "0x" << std::hex << DeviceVendorIdUInt; const auto &DeviceVendorIdValue = DeviceVendorIdHexStringStream.str(); DeviceDesc[DeviceVendorIdKeyName] = DeviceVendorIdValue; // get DriverVersion value and put it to DeviceDesc const std::string &DriverVersionValue = - sycl::detail::get_device_info( - *DeviceImpl.get()); + sycl::detail::get_device_info(DeviceImpl); DeviceDesc[DriverVersionKeyName] = DriverVersionValue; // get DeviceName value and put it to DeviceDesc const std::string &DeviceNameValue = - sycl::detail::get_device_info(*DeviceImpl.get()); + sycl::detail::get_device_info(DeviceImpl); DeviceDesc[DeviceNameKeyName] = DeviceNameValue; // check if we can allow device with such device description DeviceDesc diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 91482b9197b6a..5e8e98ca231bb 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -284,19 +284,18 @@ KernelProgramCache &context_impl::getKernelProgramCache() const { return MKernelProgramCache; } -bool context_impl::hasDevice( - std::shared_ptr Device) const { +bool context_impl::hasDevice(const detail::device_impl &Device) const { for (auto D : MDevices) - if (getSyclObjImpl(D) == Device) + if (getSyclObjImpl(D).get() == &Device) return true; return false; } -DeviceImplPtr +device_impl * context_impl::findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const { for (device D : MDevices) if (getSyclObjImpl(D)->getHandleRef() == DeviceUR) - return getSyclObjImpl(D); + return getSyclObjImpl(D).get(); return nullptr; } @@ -356,10 +355,10 @@ std::vector context_impl::initializeDeviceGlobals( return {}; const AdapterPtr &Adapter = getAdapter(); - const DeviceImplPtr &DeviceImpl = QueueImpl->getDeviceImplPtr(); + device_impl &DeviceImpl = QueueImpl->getDeviceImpl(); std::lock_guard NativeProgramLock(MDeviceGlobalInitializersMutex); auto ImgIt = MDeviceGlobalInitializers.find( - std::make_pair(NativePrg, DeviceImpl->getHandleRef())); + std::make_pair(NativePrg, DeviceImpl.getHandleRef())); if (ImgIt == MDeviceGlobalInitializers.end() || ImgIt->second.MDeviceGlobalsFullyInitialized) return {}; @@ -461,12 +460,12 @@ void context_impl::DeviceGlobalInitializer::ClearEvents( } void context_impl::memcpyToHostOnlyDeviceGlobal( - const std::shared_ptr &DeviceImpl, const void *DeviceGlobalPtr, - const void *Src, size_t DeviceGlobalTSize, bool IsDeviceImageScoped, - size_t NumBytes, size_t Offset) { + device_impl &DeviceImpl, const void *DeviceGlobalPtr, const void *Src, + size_t DeviceGlobalTSize, bool IsDeviceImageScoped, size_t NumBytes, + size_t Offset) { std::optional KeyDevice = std::nullopt; if (IsDeviceImageScoped) - KeyDevice = DeviceImpl->getHandleRef(); + KeyDevice = DeviceImpl.getHandleRef(); auto Key = std::make_pair(DeviceGlobalPtr, KeyDevice); std::lock_guard InitLock(MDeviceGlobalUnregisteredDataMutex); @@ -483,13 +482,12 @@ void context_impl::memcpyToHostOnlyDeviceGlobal( } void context_impl::memcpyFromHostOnlyDeviceGlobal( - const std::shared_ptr &DeviceImpl, void *Dest, - const void *DeviceGlobalPtr, bool IsDeviceImageScoped, size_t NumBytes, - size_t Offset) { + device_impl &DeviceImpl, void *Dest, const void *DeviceGlobalPtr, + bool IsDeviceImageScoped, size_t NumBytes, size_t Offset) { std::optional KeyDevice = std::nullopt; if (IsDeviceImageScoped) - KeyDevice = DeviceImpl->getHandleRef(); + KeyDevice = DeviceImpl.getHandleRef(); auto Key = std::make_pair(DeviceGlobalPtr, KeyDevice); std::lock_guard InitLock(MDeviceGlobalUnregisteredDataMutex); @@ -580,9 +578,8 @@ context_impl::get_default_memory_pool(const context &Context, assert(Kind == usm::alloc::device); - std::shared_ptr DevImpl = - sycl::detail::getSyclObjImpl(Device); - ur_device_handle_t DeviceHandle = DevImpl->getHandleRef(); + detail::device_impl &DevImpl = *detail::getSyclObjImpl(Device); + ur_device_handle_t DeviceHandle = DevImpl.getHandleRef(); const sycl::detail::AdapterPtr &Adapter = this->getAdapter(); // Check dev is already in our list of device pool pairs. diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 3e97296a2f9fc..adb372f46115b 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -150,33 +150,35 @@ class context_impl { KernelProgramCache &getKernelProgramCache() const; /// Returns true if and only if context contains the given device. - bool hasDevice(std::shared_ptr Device) const; + bool hasDevice(const detail::device_impl &Device) const; /// Returns true if and only if the device can be used within this context. /// For OpenCL this is currently equivalent to hasDevice, for other backends /// it returns true if the device is either a member of the context or a /// descendant of a member. - bool isDeviceValid(DeviceImplPtr Device) { - while (!hasDevice(Device)) { - if (Device->isRootDevice()) { - if (Device->has(aspect::ext_oneapi_is_component)) { + bool isDeviceValid(detail::device_impl &Device) { + detail::device_impl *CurrDevice = &Device; + while (!hasDevice(*CurrDevice)) { + if (CurrDevice->isRootDevice()) { + if (CurrDevice->has(aspect::ext_oneapi_is_component)) { // Component devices should be implicitly usable in context created // for a composite device they belong to. - auto CompositeDevice = Device->get_info< + auto CompositeDevice = CurrDevice->get_info< ext::oneapi::experimental::info::device::composite_device>(); - return hasDevice(detail::getSyclObjImpl(CompositeDevice)); + return hasDevice(*detail::getSyclObjImpl(CompositeDevice)); } return false; - } else if (Device->getBackend() == backend::opencl) { + } else if (CurrDevice->getBackend() == backend::opencl) { // OpenCL does not support using descendants of context members within // that context yet. We make the exception in case it supports // component/composite devices. // TODO remove once this limitation is lifted return false; } - Device = detail::getSyclObjImpl( - Device->get_info()); + CurrDevice = detail::getSyclObjImpl( + CurrDevice->get_info()) + .get(); } return true; @@ -190,7 +192,7 @@ class context_impl { /// Given a UR device, returns the matching shared_ptr /// within this context. May return nullptr if no match discovered. - DeviceImplPtr findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const; + device_impl *findMatchingDeviceImpl(ur_device_handle_t &DeviceUR) const; /// Gets the native handle of the SYCL context. /// @@ -216,16 +218,16 @@ class context_impl { initializeDeviceGlobals(ur_program_handle_t NativePrg, const std::shared_ptr &QueueImpl); - void memcpyToHostOnlyDeviceGlobal( - const std::shared_ptr &DeviceImpl, - const void *DeviceGlobalPtr, const void *Src, size_t DeviceGlobalTSize, - bool IsDeviceImageScoped, size_t NumBytes, size_t Offset); + void memcpyToHostOnlyDeviceGlobal(device_impl &DeviceImpl, + const void *DeviceGlobalPtr, + const void *Src, size_t DeviceGlobalTSize, + bool IsDeviceImageScoped, size_t NumBytes, + size_t Offset); - void - memcpyFromHostOnlyDeviceGlobal(const std::shared_ptr &DeviceImpl, - void *Dest, const void *DeviceGlobalPtr, - bool IsDeviceImageScoped, size_t NumBytes, - size_t Offset); + void memcpyFromHostOnlyDeviceGlobal(device_impl &DeviceImpl, void *Dest, + const void *DeviceGlobalPtr, + bool IsDeviceImageScoped, size_t NumBytes, + size_t Offset); /// Gets a program associated with a device global from the cache. std::optional diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index a097c778f034c..9c4fbadc58b6c 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -47,20 +47,18 @@ DeviceGlobalUSMMem &DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM( "USM allocations should not be acquired for device_global with " "device_image_scope property."); const std::shared_ptr &CtxImpl = QueueImpl->getContextImplPtr(); - const std::shared_ptr &DevImpl = QueueImpl->getDeviceImplPtr(); + const device_impl &DevImpl = QueueImpl->getDeviceImpl(); std::lock_guard Lock(MDeviceToUSMPtrMapMutex); - auto DGUSMPtr = MDeviceToUSMPtrMap.find({DevImpl.get(), CtxImpl.get()}); + auto DGUSMPtr = MDeviceToUSMPtrMap.find({&DevImpl, CtxImpl.get()}); if (DGUSMPtr != MDeviceToUSMPtrMap.end()) return DGUSMPtr->second; void *NewDGUSMPtr = detail::usm::alignedAllocInternal( - 0, MDeviceGlobalTSize, CtxImpl.get(), DevImpl.get(), - sycl::usm::alloc::device); + 0, MDeviceGlobalTSize, CtxImpl.get(), &DevImpl, sycl::usm::alloc::device); auto NewAllocIt = MDeviceToUSMPtrMap.emplace( - std::piecewise_construct, - std::forward_as_tuple(DevImpl.get(), CtxImpl.get()), + std::piecewise_construct, std::forward_as_tuple(&DevImpl, CtxImpl.get()), std::forward_as_tuple(NewDGUSMPtr)); assert(NewAllocIt.second && "USM allocation for device and context already happened."); diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 0c1db96e693a8..01b608023f0ce 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -707,12 +707,12 @@ class device_image_impl { getSyclObjImpl(MContext); for (const auto &SyclDev : Devices) { - const DeviceImplPtr &DevImpl = getSyclObjImpl(SyclDev); + device_impl &DevImpl = *getSyclObjImpl(SyclDev); if (!ContextImpl->hasDevice(DevImpl)) { throw sycl::exception(make_error_code(errc::invalid), "device not part of kernel_bundle context"); } - if (!DevImpl->extOneapiCanBuild(MRTCBinInfo->MLanguage)) { + if (!DevImpl.extOneapiCanBuild(MRTCBinInfo->MLanguage)) { // This error cannot not be exercised in the current implementation, as // compatibility with a source language depends on the backend's // capabilities and all devices in one context share the same backend in @@ -799,12 +799,12 @@ class device_image_impl { getSyclObjImpl(MContext); for (const auto &SyclDev : Devices) { - DeviceImplPtr DevImpl = getSyclObjImpl(SyclDev); + detail::device_impl &DevImpl = *getSyclObjImpl(SyclDev); if (!ContextImpl->hasDevice(DevImpl)) { throw sycl::exception(make_error_code(errc::invalid), "device not part of kernel_bundle context"); } - if (!DevImpl->extOneapiCanCompile(MRTCBinInfo->MLanguage)) { + if (!DevImpl.extOneapiCanCompile(MRTCBinInfo->MLanguage)) { // This error cannot not be exercised in the current implementation, as // compatibility with a source language depends on the backend's // capabilities and all devices in one context share the same backend in diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 0a3abbd4a470d..812072491d076 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -893,10 +893,10 @@ bool device_impl::extOneapiCanBuild( ext::oneapi::experimental::source_language Language) { try { // Get the shared_ptr to this object from the platform that owns it. - std::shared_ptr Self = MPlatform->getOrMakeDeviceImpl(MDevice); + device_impl &Self = MPlatform->getOrMakeDeviceImpl(MDevice); return sycl::ext::oneapi::experimental::detail:: is_source_kernel_bundle_supported(Language, - std::vector{Self}); + std::vector{&Self}); } catch (sycl::exception &) { return false; @@ -907,11 +907,11 @@ bool device_impl::extOneapiCanCompile( ext::oneapi::experimental::source_language Language) { try { // Currently only SYCL language is supported for compiling. - std::shared_ptr Self = MPlatform->getOrMakeDeviceImpl(MDevice); + device_impl &Self = MPlatform->getOrMakeDeviceImpl(MDevice); return Language == ext::oneapi::experimental::source_language::sycl && sycl::ext::oneapi::experimental::detail:: is_source_kernel_bundle_supported( - Language, std::vector{Self}); + Language, std::vector{&Self}); } catch (sycl::exception &) { return false; } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index d3e8fb536736b..90185f04f3e64 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -32,7 +32,7 @@ namespace detail { class platform_impl; // TODO: Make code thread-safe -class device_impl { +class device_impl : public std::enable_shared_from_this { struct private_tag { explicit private_tag() = default; }; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 2e8c6abb24eae..2a5db00e26217 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -39,7 +39,7 @@ void event_impl::initContextIfNeeded() { const device SyclDevice; this->setContextImpl( - detail::queue_impl::getDefaultOrNew(detail::getSyclObjImpl(SyclDevice))); + detail::queue_impl::getDefaultOrNew(*detail::getSyclObjImpl(SyclDevice))); } event_impl::~event_impl() { @@ -442,8 +442,8 @@ event_impl::get_backend_info() const { "only be queried with an OpenCL backend"); } if (QueueImplPtr Queue = MQueue.lock()) { - return Queue->getDeviceImplPtr() - ->get_platform() + return Queue->getDeviceImpl() + .get_platform() .get_info(); } // If the queue has been released, no platform will be associated @@ -465,7 +465,7 @@ event_impl::get_backend_info() const { "be queried with an OpenCL backend"); } if (QueueImplPtr Queue = MQueue.lock()) { - return Queue->getDeviceImplPtr()->get_info(); + return Queue->getDeviceImpl().get_info(); } return ""; // If the queue has been released, no device will be associated so // return empty string @@ -590,7 +590,7 @@ void event_impl::setSubmissionTime() { if (!MFallbackProfiling) { if (QueueImplPtr Queue = MQueue.lock()) { try { - MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); + MSubmitTime = Queue->getDeviceImpl().getCurrentDeviceTime(); } catch (sycl::exception &e) { if (e.code() == sycl::errc::feature_not_supported) throw sycl::exception( diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 1f72e17aac844..ff613823f1f04 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -784,7 +784,7 @@ void exec_graph_impl::findRealDeps( ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect(sycl::context Ctx, - sycl::detail::DeviceImplPtr DeviceImpl, + sycl::detail::device_impl &DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, std::shared_ptr Node) { std::vector Deps; @@ -861,10 +861,10 @@ void exec_graph_impl::createCommandBuffers( Partition->MIsInOrderGraph && !MEnableProfiling, MEnableProfiling}; auto ContextImpl = sycl::detail::getSyclObjImpl(MContext); const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); - auto DeviceImpl = sycl::detail::getSyclObjImpl(Device); + sycl::detail::device_impl &DeviceImpl = *sycl::detail::getSyclObjImpl(Device); ur_result_t Res = Adapter->call_nocheck( - ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), &Desc, + ContextImpl->getHandleRef(), DeviceImpl.getHandleRef(), &Desc, &OutCommandBuffer); if (Res != UR_RESULT_SUCCESS) { throw sycl::exception(errc::invalid, "Failed to create UR command-buffer"); @@ -918,7 +918,7 @@ exec_graph_impl::exec_graph_impl(sycl::context Context, const property_list &PropList) : MSchedule(), MGraphImpl(GraphImpl), MSyncPoints(), MQueueImpl(std::make_shared( - sycl::detail::getSyclObjImpl(GraphImpl->getDevice()), + *sycl::detail::getSyclObjImpl(GraphImpl->getDevice()), sycl::detail::getSyclObjImpl(Context), sycl::async_handler{}, sycl::property_list{})), MDevice(GraphImpl->getDevice()), MContext(Context), MRequirements(), @@ -1462,7 +1462,8 @@ void exec_graph_impl::populateURKernelUpdateStructs( ur_exp_command_buffer_update_kernel_launch_desc_t &UpdateDesc) const { auto ContextImpl = sycl::detail::getSyclObjImpl(MContext); const sycl::detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); - auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice()); + sycl::detail::device_impl &DeviceImpl = + *sycl::detail::getSyclObjImpl(MGraphImpl->getDevice()); // Gather arg information from Node auto &ExecCG = @@ -1515,7 +1516,7 @@ void exec_graph_impl::populateURKernelUpdateStructs( LocalSize = &NDRDesc.LocalSize[0]; else { Adapter->call( - UrKernel, DeviceImpl->getHandleRef(), + UrKernel, DeviceImpl.getHandleRef(), UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), RequiredWGSize, /* param_value_size_ret = */ nullptr); diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 32870e9062849..ccc6455f6296f 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -949,9 +949,7 @@ class graph_impl : public std::enable_shared_from_this { /// Query for the device_impl tied to this graph. /// @return device_impl shared ptr reference associated with graph. - const DeviceImplPtr &getDeviceImplPtr() const { - return getSyclObjImpl(MDevice); - } + device_impl &getDeviceImpl() const { return *getSyclObjImpl(MDevice); } /// Query for the device tied to this graph. /// @return Device associated with graph. @@ -1421,7 +1419,7 @@ class exec_graph_impl { /// @param Node The node being enqueued. /// @return UR sync point created for this node in the command-buffer. ur_exp_command_buffer_sync_point_t - enqueueNodeDirect(sycl::context Ctx, sycl::detail::DeviceImplPtr DeviceImpl, + enqueueNodeDirect(sycl::context Ctx, sycl::detail::device_impl &DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, std::shared_ptr Node); diff --git a/sycl/source/detail/graph_memory_pool.hpp b/sycl/source/detail/graph_memory_pool.hpp index be90c3b58892a..e9133bb7f7d0c 100644 --- a/sycl/source/detail/graph_memory_pool.hpp +++ b/sycl/source/detail/graph_memory_pool.hpp @@ -140,8 +140,8 @@ class graph_mem_pool { : address_access_mode::read_write; // Create physical memory - auto PhysicalMem = std::make_shared(MDevice, MContext, - AllocInfo.Size); + auto PhysicalMem = std::make_shared( + *getSyclObjImpl(MDevice), MContext, AllocInfo.Size); // Map the virtual reservation to it PhysicalMem->map(reinterpret_cast(Ptr), AllocInfo.Size, AccessMode, 0); diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index feb4f333a2c7f..58192ec89a036 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -40,10 +40,9 @@ markBufferAsInternal(const std::shared_ptr &BufImpl) { std::tuple retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, CGExecKernel *KernelCG) { - bool isNvidia = - Queue->getDeviceImplPtr()->getBackend() == backend::ext_oneapi_cuda; - bool isHIP = - Queue->getDeviceImplPtr()->getBackend() == backend::ext_oneapi_hip; + device_impl &Dev = Queue->getDeviceImpl(); + bool isNvidia = Dev.getBackend() == backend::ext_oneapi_cuda; + bool isHIP = Dev.getBackend() == backend::ext_oneapi_hip; if (isNvidia || isHIP) { auto KernelID = ProgramManager::getInstance().getSYCLKernelID(KernelName); std::vector KernelIds{KernelID}; @@ -61,11 +60,10 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, return {nullptr, nullptr}; } auto ContextImpl = Queue->getContextImplPtr(); - auto DeviceImpl = Queue->getDeviceImplPtr(); - auto Device = detail::createSyclObjFromImpl(DeviceImpl); + auto Device = detail::createSyclObjFromImpl(Dev); ur_program_handle_t Program = detail::ProgramManager::getInstance().createURProgram( - **DeviceImage, ContextImpl, {std::move(Device)}); + **DeviceImage, ContextImpl, {createSyclObjFromImpl(Dev)}); return {*DeviceImage, Program}; } @@ -84,12 +82,10 @@ retrieveKernelBinary(const QueueImplPtr &Queue, const char *KernelName, Program = SyclKernelImpl->getDeviceImage()->get_ur_program_ref(); } else { auto ContextImpl = Queue->getContextImplPtr(); - auto DeviceImpl = Queue->getDeviceImplPtr(); - auto Device = detail::createSyclObjFromImpl(DeviceImpl); DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage( - KernelName, ContextImpl, DeviceImpl.get()); + KernelName, ContextImpl, &Dev); Program = detail::ProgramManager::getInstance().createURProgram( - *DeviceImage, ContextImpl, {std::move(Device)}); + *DeviceImage, ContextImpl, {createSyclObjFromImpl(Dev)}); } return {DeviceImage, Program}; } diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 6e0da2eb795aa..ffce162ecbab4 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -125,7 +125,7 @@ translateBinaryImageFormat(ur::DeviceBinaryType Type) { } static ::jit_compiler::BinaryFormat getTargetFormat(const QueueImplPtr &Queue) { - auto Backend = Queue->getDeviceImplPtr()->getBackend(); + auto Backend = Queue->getDeviceImpl().getBackend(); switch (Backend) { case backend::ext_oneapi_level_zero: case backend::opencl: diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index f208187523f69..eb0340fb938b7 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -34,14 +34,14 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental::detail { -using DeviceImplPtr = std::shared_ptr; +using namespace sycl::detail; bool is_source_kernel_bundle_supported( sycl::ext::oneapi::experimental::source_language Language, const context &Ctx); bool is_source_kernel_bundle_supported( sycl::ext::oneapi::experimental::source_language Language, - const std::vector &Devices); + const std::vector &Devices); } // namespace ext::oneapi::experimental::detail namespace detail { @@ -50,7 +50,7 @@ static bool checkAllDevicesAreInContext(const std::vector &Devices, const context &Context) { return std::all_of( Devices.begin(), Devices.end(), [&Context](const device &Dev) { - return getSyclObjImpl(Context)->isDeviceValid(getSyclObjImpl(Dev)); + return getSyclObjImpl(Context)->isDeviceValid(*getSyclObjImpl(Dev)); }); } diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp index a648a5596d4ee..c38fcfcd339b3 100644 --- a/sycl/source/detail/physical_mem_impl.hpp +++ b/sycl/source/detail/physical_mem_impl.hpp @@ -37,14 +37,14 @@ inline ur_virtual_mem_access_flag_t AccessModeToVirtualAccessFlags( class physical_mem_impl { public: - physical_mem_impl(const device &SyclDevice, const context &SyclContext, + physical_mem_impl(device_impl &DeviceImpl, const context &SyclContext, size_t NumBytes) - : MDevice(getSyclObjImpl(SyclDevice)), - MContext(getSyclObjImpl(SyclContext)), MNumBytes(NumBytes) { + : MDevice(DeviceImpl), MContext(getSyclObjImpl(SyclContext)), + MNumBytes(NumBytes) { const AdapterPtr &Adapter = MContext->getAdapter(); auto Err = Adapter->call_nocheck( - MContext->getHandleRef(), MDevice->getHandleRef(), MNumBytes, nullptr, + MContext->getHandleRef(), MDevice.getHandleRef(), MNumBytes, nullptr, &MPhysicalMem); if (Err == UR_RESULT_ERROR_OUT_OF_RESOURCES || @@ -82,7 +82,7 @@ class physical_mem_impl { private: ur_physical_mem_handle_t MPhysicalMem = nullptr; - const std::shared_ptr MDevice; + device_impl &MDevice; const std::shared_ptr MContext; const size_t MNumBytes; }; diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index 328f9925b44f5..7e9bed09ee044 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -289,26 +289,22 @@ platform_impl::filterDeviceFilter(std::vector &UrDevices, return original_indices; } -std::shared_ptr -platform_impl::getDeviceImpl(ur_device_handle_t UrDevice) { +device_impl *platform_impl::getDeviceImpl(ur_device_handle_t UrDevice) { const std::lock_guard Guard(MDeviceMapMutex); return getDeviceImplHelper(UrDevice); } -std::shared_ptr -platform_impl::getOrMakeDeviceImpl(ur_device_handle_t UrDevice) { +device_impl &platform_impl::getOrMakeDeviceImpl(ur_device_handle_t UrDevice) { const std::lock_guard Guard(MDeviceMapMutex); // If we've already seen this device, return the impl - std::shared_ptr Result = getDeviceImplHelper(UrDevice); - if (Result) - return Result; + if (device_impl *Result = getDeviceImplHelper(UrDevice)) + return *Result; // Otherwise make the impl - Result = std::make_shared(UrDevice, *this, - device_impl::private_tag{}); - MDevices.emplace_back(Result); + MDevices.emplace_back(std::make_shared( + UrDevice, *this, device_impl::private_tag{})); - return Result; + return *MDevices.back(); } static bool supportsAffinityDomain(const device &dev, @@ -635,11 +631,10 @@ bool platform_impl::has(aspect Aspect) const { return true; } -std::shared_ptr -platform_impl::getDeviceImplHelper(ur_device_handle_t UrDevice) { +device_impl *platform_impl::getDeviceImplHelper(ur_device_handle_t UrDevice) { for (const std::shared_ptr &Device : MDevices) { if (Device->getHandleRef() == UrDevice) - return Device; + return Device.get(); } return nullptr; } diff --git a/sycl/source/detail/platform_impl.hpp b/sycl/source/detail/platform_impl.hpp index e85a7cc8a7730..5914f0e547f02 100644 --- a/sycl/source/detail/platform_impl.hpp +++ b/sycl/source/detail/platform_impl.hpp @@ -159,8 +159,8 @@ class platform_impl : public std::enable_shared_from_this { /// /// \param UrDevice is the UrDevice whose impl is requested /// - /// \return a shared_ptr corresponding to the device - std::shared_ptr getDeviceImpl(ur_device_handle_t UrDevice); + /// \return a device_impl* corresponding to the device + device_impl *getDeviceImpl(ur_device_handle_t UrDevice); /// Queries the device_impl cache to either return a shared_ptr /// for the device_impl corresponding to the UrDevice or add @@ -170,8 +170,8 @@ class platform_impl : public std::enable_shared_from_this { /// /// \param PlatormImpl is the Platform for that Device /// - /// \return a shared_ptr corresponding to the device - std::shared_ptr getOrMakeDeviceImpl(ur_device_handle_t UrDevice); + /// \return a device_impl* corresponding to the device + device_impl &getOrMakeDeviceImpl(ur_device_handle_t UrDevice); /// Queries the cache to see if the specified UR platform has been seen /// before. If so, return the cached platform_impl, otherwise create a new @@ -200,7 +200,7 @@ class platform_impl : public std::enable_shared_from_this { bool MAlwaysRootDevice = false; private: - std::shared_ptr getDeviceImplHelper(ur_device_handle_t UrDevice); + device_impl *getDeviceImplHelper(ur_device_handle_t UrDevice); // Helper to get the vector of platforms supported by a given UR adapter static std::vector getAdapterPlatforms(AdapterPtr &Adapter, diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index abae1d620c243..5812ac8059dba 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -368,8 +368,8 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, appendCompileOptionsForGRFSizeProperties(CompileOpts, Img, isEsimdImage); - const detail::DeviceImplPtr &DeviceImpl = detail::getSyclObjImpl(Devs[0]); - const platform_impl &PlatformImpl = DeviceImpl->getPlatformImpl(); + const platform_impl &PlatformImpl = + detail::getSyclObjImpl(Devs[0])->getPlatformImpl(); // Add optimization flags. auto str = getUint32PropAsOptStr(Img, "optLevel"); @@ -584,11 +584,10 @@ static const char *getUrDeviceTarget(const char *URDeviceTarget) { static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, const device &Dev) { - const std::shared_ptr &DeviceImpl = - detail::getSyclObjImpl(Dev); - auto &Adapter = DeviceImpl->getAdapter(); + detail::device_impl &DeviceImpl = *detail::getSyclObjImpl(Dev); + auto &Adapter = DeviceImpl.getAdapter(); - const ur_device_handle_t &URDeviceHandle = DeviceImpl->getHandleRef(); + const ur_device_handle_t &URDeviceHandle = DeviceImpl.getHandleRef(); // Call urDeviceSelectBinary with only one image to check if an image is // compatible with implementation. The function returns invalid index if no @@ -641,11 +640,10 @@ bool ProgramManager::isSpecialDeviceImageShouldBeUsed( // more devicelib images in this way. enum { DEVICELIB_FALLBACK = 0, DEVICELIB_NATIVE }; ur_bool_t NativeBF16Supported = false; - const std::shared_ptr &DeviceImpl = - detail::getSyclObjImpl(Dev); + detail::device_impl &DeviceImpl = *detail::getSyclObjImpl(Dev); ur_result_t CallSuccessful = - DeviceImpl->getAdapter()->call_nocheck( - DeviceImpl->getHandleRef(), + DeviceImpl.getAdapter()->call_nocheck( + DeviceImpl.getHandleRef(), UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE, sizeof(ur_bool_t), &NativeBF16Supported, nullptr); if (CallSuccessful != UR_RESULT_SUCCESS) { @@ -850,22 +848,24 @@ CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { // When caching is enabled, the returned UrProgram will already have // its ref count incremented. ur_program_handle_t ProgramManager::getBuiltURProgram( - const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, + const ContextImplPtr &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, const NDRDescT &NDRDesc) { - DeviceImplPtr RootDevImpl; + device_impl *RootDevImpl; ur_bool_t MustBuildOnSubdevice = true; // Check if we can optimize program builds for sub-devices by using a program // built for the root device - if (!DeviceImpl->isRootDevice()) { - RootDevImpl = DeviceImpl; + if (!DeviceImpl.isRootDevice()) { + RootDevImpl = &DeviceImpl; while (!RootDevImpl->isRootDevice()) { - auto ParentDev = detail::getSyclObjImpl( - RootDevImpl->get_info()); + device_impl *ParentDev = + detail::getSyclObjImpl( + RootDevImpl->get_info()) + .get(); // Sharing is allowed within a single context only - if (!ContextImpl->hasDevice(ParentDev)) + if (!ContextImpl->hasDevice(*ParentDev)) break; - RootDevImpl = std::move(ParentDev); + RootDevImpl = ParentDev; } ContextImpl->getAdapter()->call( @@ -873,8 +873,8 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( sizeof(ur_bool_t), &MustBuildOnSubdevice, nullptr); } - auto Device = createSyclObjFromImpl( - MustBuildOnSubdevice == true ? DeviceImpl : RootDevImpl); + device Device = createSyclObjFromImpl( + MustBuildOnSubdevice == true ? DeviceImpl : *RootDevImpl); const RTDeviceBinaryImage &Img = getDeviceImage(KernelName, ContextImpl, getSyclObjImpl(Device).get()); @@ -1111,18 +1111,18 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( std::tuple ProgramManager::getOrCreateKernel(const ContextImplPtr &ContextImpl, - const DeviceImplPtr &DeviceImpl, + device_impl &DeviceImpl, KernelNameStrRefT KernelName, const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get() - << ", " << DeviceImpl.get() << ", " << KernelName << ")\n"; + << ", " << &DeviceImpl << ", " << KernelName << ")\n"; } using KernelArgMaskPairT = KernelProgramCache::KernelArgMaskPairT; KernelProgramCache &Cache = ContextImpl->getKernelProgramCache(); - ur_device_handle_t UrDevice = DeviceImpl->getHandleRef(); + ur_device_handle_t UrDevice = DeviceImpl.getHandleRef(); auto key = std::make_pair(UrDevice, KernelName); if (SYCLConfig::get()) { @@ -3320,8 +3320,8 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( std::cerr << ">>> Adding the kernel to the cache.\n"; const ContextImplPtr &ContextImpl = detail::getSyclObjImpl(Context); auto Program = createURProgram(Img, ContextImpl, {Device}); - auto DeviceImpl = detail::getSyclObjImpl(Device); - auto &Adapter = DeviceImpl->getAdapter(); + detail::device_impl &DeviceImpl = *detail::getSyclObjImpl(Device); + auto &Adapter = DeviceImpl.getAdapter(); UrFuncInfo programReleaseInfo; auto programRelease = programReleaseInfo.getFuncPtrFromModule(ur::getURLoaderLibrary()); @@ -3332,7 +3332,7 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( applyOptionsFromEnvironment(CompileOpts, LinkOpts); // No linking of extra programs reqruired. std::vector ExtraProgramsToLink; - std::vector Devs = {DeviceImpl->getHandleRef()}; + std::vector Devs = {DeviceImpl.getHandleRef()}; auto BuildProgram = build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, Devs, /*For non SPIR-V devices DeviceLibReqdMask is always 0*/ 0, diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 726ea4f5909cb..af4b0602263b0 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -72,7 +72,6 @@ static constexpr uint32_t inline ITTSpecConstId = 0xFF747469; class context_impl; using ContextImplPtr = std::shared_ptr; class device_impl; -using DeviceImplPtr = std::shared_ptr; class queue_impl; class event_impl; // DeviceLibExt is shared between sycl runtime and sycl-post-link tool. @@ -177,7 +176,7 @@ class ProgramManager { /// \param Device the device for which the program is built /// \param KernelName the kernel's name ur_program_handle_t getBuiltURProgram(const ContextImplPtr &ContextImpl, - const DeviceImplPtr &DeviceImpl, + device_impl &DeviceImpl, KernelNameStrRefT KernelName, const NDRDescT &NDRDesc = {}); @@ -200,8 +199,7 @@ class ProgramManager { std::tuple - getOrCreateKernel(const ContextImplPtr &ContextImpl, - const DeviceImplPtr &DeviceImpl, + getOrCreateKernel(const ContextImplPtr &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, const NDRDescT &NDRDesc = {}); ur_kernel_handle_t getCachedMaterializedKernel( diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index afeb596df15d4..8230f3a7f4906 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -164,7 +164,7 @@ event queue_impl::memset(const std::shared_ptr &Self, SYCL_STREAM_NAME, "memory_transfer_node::memset"); PrepareNotify.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice->getHandleRef())); + reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr)); xpti::addMetadata(TEvent, "value_set", Value); xpti::addMetadata(TEvent, "memory_size", Count); @@ -212,7 +212,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, SYCL_STREAM_NAME, "memory_transfer_node::memcpy"); PrepareNotify.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice->getHandleRef())); + reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast(Src)); xpti::addMetadata(TEvent, "dest_memory_ptr", reinterpret_cast(Dest)); @@ -691,11 +691,9 @@ void queue_impl::constructorNotification() { xpti::addMetadata(TEvent, "sycl_context", reinterpret_cast(MContext->getHandleRef())); - if (MDevice) { - xpti::addMetadata(TEvent, "sycl_device_name", MDevice->getDeviceName()); - xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice->getHandleRef())); - } + xpti::addMetadata(TEvent, "sycl_device_name", MDevice.getDeviceName()); + xpti::addMetadata(TEvent, "sycl_device", + reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "is_inorder", MIsInorder); xpti::addMetadata(TEvent, "queue_id", MQueueID); xpti::addMetadata(TEvent, "queue_handle", diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index fedf8c0d4ed51..0d09d05f15534 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -53,7 +53,6 @@ class graph_impl; namespace detail { using ContextImplPtr = std::shared_ptr; -using DeviceImplPtr = std::shared_ptr; /// Sets max number of queues supported by FPGA RT. static constexpr size_t MaxNumQueues = 256; @@ -79,13 +78,13 @@ class queue_impl { public: // \return a default context for the platform if it includes the device // passed and default contexts are enabled, a new context otherwise. - static ContextImplPtr getDefaultOrNew(const DeviceImplPtr &Device) { + static ContextImplPtr getDefaultOrNew(device_impl &Device) { if (!SYCLConfig::get()) return detail::getSyclObjImpl( context{createSyclObjFromImpl(Device), {}, {}}); - ContextImplPtr DefaultContext = detail::getSyclObjImpl( - Device->get_platform().khr_get_default_context()); + ContextImplPtr DefaultContext = + detail::getSyclObjImpl(Device.get_platform().khr_get_default_context()); if (DefaultContext->isDeviceValid(Device)) return DefaultContext; return detail::getSyclObjImpl( @@ -98,7 +97,7 @@ class queue_impl { /// to the queue. /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. - queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, + queue_impl(device_impl &Device, const async_handler &AsyncHandler, const property_list &PropList) : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; @@ -111,7 +110,7 @@ class queue_impl { /// constructed. /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. - queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, + queue_impl(device_impl &Device, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), @@ -128,10 +127,10 @@ class queue_impl { "Queue cannot be constructed with both of " "discard_events and enable_profiling."); // fallback profiling support. See MFallbackProfiling - if (MDevice->has(aspect::queue_profiling)) { + if (MDevice.has(aspect::queue_profiling)) { // When urDeviceGetGlobalTimestamps is not supported, compute the // profiling time OpenCL version < 2.1 case - if (!getDeviceImplPtr()->isGetDeviceAndHostTimerSupported()) + if (!getDeviceImpl().isGetDeviceAndHostTimerSupported()) MFallbackProfiling = true; } else { throw sycl::exception(make_error_code(errc::feature_not_supported), @@ -182,40 +181,6 @@ class queue_impl { sycl::detail::optional getLastEvent(); -private: - void queue_impl_interop(ur_queue_handle_t UrQueue) { - if (has_property() && - has_property()) { - throw sycl::exception(make_error_code(errc::invalid), - "Queue cannot be constructed with both of " - "discard_events and enable_profiling."); - } - - MQueue = UrQueue; - - ur_device_handle_t DeviceUr{}; - const AdapterPtr &Adapter = getAdapter(); - // TODO catch an exception and put it to list of asynchronous exceptions - Adapter->call( - MQueue, UR_QUEUE_INFO_DEVICE, sizeof(DeviceUr), &DeviceUr, nullptr); - MDevice = MContext->findMatchingDeviceImpl(DeviceUr); - if (MDevice == nullptr) { - throw sycl::exception( - make_error_code(errc::invalid), - "Device provided by native Queue not found in Context."); - } - // The following commented section provides a guideline on how to use the - // TLS enabled mechanism to create a tracepoint and notify using XPTI. This - // is the prolog section and the epilog section will initiate the - // notification. -#if XPTI_ENABLE_INSTRUMENTATION - // Emit a trace event for queue creation; we currently do not get code - // location information, so all queueus will have the same UID with a - // different instance ID until this gets added. - constructorNotification(); -#endif - } - public: /// Constructs a SYCL queue from adapter interoperability handle. /// @@ -225,15 +190,7 @@ class queue_impl { /// \param AsyncHandler is a SYCL asynchronous exception handler. queue_impl(ur_queue_handle_t UrQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler) - : MContext(Context), MAsyncHandler(AsyncHandler), - MIsInorder(has_property()), - MDiscardEvents( - has_property()), - MIsProfilingEnabled(has_property()), - MQueueID{ - MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { - queue_impl_interop(UrQueue); - } + : queue_impl(UrQueue, Context, AsyncHandler, {}) {} /// Constructs a SYCL queue from adapter interoperability handle. /// @@ -244,15 +201,47 @@ class queue_impl { /// \param PropList is the queue properties. queue_impl(ur_queue_handle_t UrQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler, const property_list &PropList) - : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), - MIsInorder(has_property()), + : MDevice([&]() -> device_impl & { + ur_device_handle_t DeviceUr{}; + const AdapterPtr &Adapter = Context->getAdapter(); + // TODO catch an exception and put it to list of asynchronous + // exceptions + Adapter->call( + UrQueue, UR_QUEUE_INFO_DEVICE, sizeof(DeviceUr), &DeviceUr, + nullptr); + device_impl *Device = Context->findMatchingDeviceImpl(DeviceUr); + if (Device == nullptr) { + throw sycl::exception( + make_error_code(errc::invalid), + "Device provided by native Queue not found in Context."); + } + return *Device; + }()), + MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), + MQueue(UrQueue), MIsInorder(has_property()), MDiscardEvents( has_property()), MIsProfilingEnabled(has_property()), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { verifyProps(PropList); - queue_impl_interop(UrQueue); + if (has_property() && + has_property()) { + throw sycl::exception(make_error_code(errc::invalid), + "Queue cannot be constructed with both of " + "discard_events and enable_profiling."); + } + + // The following commented section provides a guideline on how to use the + // TLS enabled mechanism to create a tracepoint and notify using XPTI. This + // is the prolog section and the epilog section will initiate the + // notification. +#if XPTI_ENABLE_INSTRUMENTATION + // Emit a trace event for queue creation; we currently do not get code + // location information, so all queueus will have the same UID with a + // different instance ID until this gets added. + constructorNotification(); +#endif } ~queue_impl() { @@ -299,7 +288,7 @@ class queue_impl { const ContextImplPtr &getContextImplPtr() const { return MContext; } - const DeviceImplPtr &getDeviceImplPtr() const { return MDevice; } + device_impl &getDeviceImpl() const { return MDevice; } /// \return an associated SYCL device. device get_device() const { return createSyclObjFromImpl(MDevice); } @@ -504,7 +493,7 @@ class queue_impl { ur_queue_handle_t createQueue(QueueOrder Order) { ur_queue_handle_t Queue{}; ur_context_handle_t Context = MContext->getHandleRef(); - ur_device_handle_t Device = MDevice->getHandleRef(); + ur_device_handle_t Device = MDevice.getHandleRef(); const AdapterPtr &Adapter = getAdapter(); /* sycl::detail::pi::PiQueueProperties Properties[] = { @@ -941,7 +930,7 @@ class queue_impl { /// Protects all the fields that can be changed by class' methods. mutable std::mutex MMutex; - DeviceImplPtr MDevice; + device_impl &MDevice; const ContextImplPtr MContext; /// These events are tracked, but not owned, by the queue. diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 9201be0a318b0..4e44e9df241ac 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -421,7 +421,7 @@ class DispatchHostTask { assert(HostTask.MQueue && "Host task submissions should have an associated queue"); interop_handle IH{MReqToMem, HostTask.MQueue, - HostTask.MQueue->getDeviceImplPtr(), + HostTask.MQueue->getDeviceImpl().shared_from_this(), HostTask.MQueue->getContextImplPtr()}; // TODO: should all the backends that support this entry point use this // for host task? @@ -1458,7 +1458,7 @@ bool UnMapMemObject::producesPiEvent() const { // an event waitlist and Level Zero adapter attempts to batch these commands, // so the execution of kernel B starts only on step 4. This workaround // restores the old behavior in this case until this is resolved. - return MQueue && (MQueue->getDeviceImplPtr()->getBackend() != + return MQueue && (MQueue->getDeviceImpl().getBackend() != backend::ext_oneapi_level_zero || MEvent->getHandle() != nullptr); } @@ -1565,7 +1565,7 @@ bool MemCpyCommand::producesPiEvent() const { // so the execution of kernel B starts only on step 4. This workaround // restores the old behavior in this case until this is resolved. return !MQueue || - MQueue->getDeviceImplPtr()->getBackend() != + MQueue->getDeviceImpl().getBackend() != backend::ext_oneapi_level_zero || MEvent->getHandle() != nullptr; } @@ -2013,7 +2013,7 @@ void instrumentationAddExtraKernelMetadata( // by graph API, when a modifiable graph is finalized. std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) = detail::ProgramManager::getInstance().getOrCreateKernel( - Queue->getContextImplPtr(), Queue->getDeviceImplPtr(), KernelName); + Queue->getContextImplPtr(), Queue->getDeviceImpl(), KernelName); } applyFuncOnFilteredArgs(EliminatedArgMask, CGArgs, FilterArgs); @@ -2422,7 +2422,7 @@ static ur_result_t SetKernelParamsAndLaunch( Kernel, ImplicitLocalArg.value(), WorkGroupMemorySize, nullptr); } - adjustNDRangePerKernel(NDRDesc, Kernel, *(Queue->getDeviceImplPtr())); + adjustNDRangePerKernel(NDRDesc, Kernel, Queue->getDeviceImpl()); // Remember this information before the range dimensions are reversed const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); @@ -2436,7 +2436,7 @@ static ur_result_t SetKernelParamsAndLaunch( LocalSize = &NDRDesc.LocalSize[0]; else { Adapter->call( - Kernel, Queue->getDeviceImplPtr()->getHandleRef(), + Kernel, Queue->getDeviceImpl().getHandleRef(), UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), RequiredWGSize, /* pPropSizeRet = */ nullptr); @@ -2512,7 +2512,7 @@ namespace { std::tuple, const KernelArgMask *> getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, - DeviceImplPtr DeviceImpl, + device_impl &DeviceImpl, std::vector &UrKernelsToRelease, std::vector &UrProgramsToRelease) { @@ -2545,7 +2545,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, } // anonymous namespace ur_result_t enqueueImpCommandBufferKernel( - context Ctx, DeviceImplPtr DeviceImpl, + context Ctx, device_impl &DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, std::vector &SyncPoints, @@ -2610,7 +2610,7 @@ ur_result_t enqueueImpCommandBufferKernel( LocalSize = &NDRDesc.LocalSize[0]; else { Adapter->call( - UrKernel, DeviceImpl->getHandleRef(), + UrKernel, DeviceImpl.getHandleRef(), UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(RequiredWGSize), RequiredWGSize, /* pPropSizeRet = */ nullptr); @@ -2648,8 +2648,7 @@ ur_result_t enqueueImpCommandBufferKernel( } if (Res != UR_RESULT_SUCCESS) { - const device_impl &DeviceImplem = *(DeviceImpl); - detail::enqueue_kernel_launch::handleErrorOrWarning(Res, DeviceImplem, + detail::enqueue_kernel_launch::handleErrorOrWarning(Res, DeviceImpl, UrKernel, NDRDesc); } @@ -2669,7 +2668,7 @@ void enqueueImpKernel( assert(Queue && "Kernel submissions should have an associated queue"); // Run OpenCL kernel auto &ContextImpl = Queue->getContextImplPtr(); - auto &DeviceImpl = Queue->getDeviceImplPtr(); + device_impl &DeviceImpl = Queue->getDeviceImpl(); ur_kernel_handle_t Kernel = nullptr; std::mutex *KernelMutex = nullptr; ur_program_handle_t Program = nullptr; @@ -2760,7 +2759,7 @@ void enqueueImpKernel( if (UR_RESULT_SUCCESS != Error) { // If we have got non-success error code, let's analyze it to emit nice // exception explaining what was wrong - detail::enqueue_kernel_launch::handleErrorOrWarning(Error, *DeviceImpl, + detail::enqueue_kernel_launch::handleErrorOrWarning(Error, DeviceImpl, Kernel, NDRDesc); } } @@ -2859,7 +2858,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { }; auto result = enqueueImpCommandBufferKernel( - MQueue->get_context(), MQueue->getDeviceImplPtr(), MCommandBuffer, + MQueue->get_context(), MQueue->getDeviceImpl(), MCommandBuffer, *ExecKernel, MSyncPointDeps, &OutSyncPoint, &OutCommand, getMemAllocationFunc); MEvent->setSyncPoint(OutSyncPoint); @@ -3002,7 +3001,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { // scheduler. const AdapterPtr &Adapter = MQueue->getAdapter(); auto ContextImpl = MQueue->getContextImplPtr(); - auto DeviceImpl = MQueue->getDeviceImplPtr(); + device_impl &DeviceImpl = MQueue->getDeviceImpl(); // The CUDA & HIP backends don't have the equivalent of barrier // commands that can be appended to the native UR command-buffer @@ -3018,7 +3017,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { // urCommandBufferAppendNativeCommandExp. ur_bool_t DeviceHasSubgraphSupport = false; Adapter->call( - DeviceImpl->getHandleRef(), + DeviceImpl.getHandleRef(), UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP, sizeof(ur_bool_t), &DeviceHasSubgraphSupport, nullptr); @@ -3030,7 +3029,7 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { false /* profilable*/ }; Adapter->call( - ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), &Desc, + ContextImpl->getHandleRef(), DeviceImpl.getHandleRef(), &Desc, &ChildCommandBuffer); } @@ -3064,7 +3063,8 @@ ur_result_t ExecCGCommand::enqueueImpCommandBuffer() { ur_exp_command_buffer_handle_t InteropCommandBuffer = ChildCommandBuffer ? ChildCommandBuffer : MCommandBuffer; - interop_handle IH{std::move(ReqToMem), MQueue, DeviceImpl, ContextImpl, + interop_handle IH{std::move(ReqToMem), MQueue, + DeviceImpl.shared_from_this(), ContextImpl, InteropCommandBuffer}; CommandBufferNativeCommandData CustomOpData{ std::move(IH), HostTask->MHostTask->MInteropTask}; @@ -3471,7 +3471,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { EnqueueNativeCommandData CustomOpData{ interop_handle{std::move(ReqToMem), HostTask->MQueue, - HostTask->MQueue->getDeviceImplPtr(), + HostTask->MQueue->getDeviceImpl().shared_from_this(), HostTask->MQueue->getContextImplPtr()}, HostTask->MHostTask->MInteropTask}; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index a602f7c4b373c..d6f439f536776 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -37,9 +37,6 @@ void emitInstrumentationGeneral(uint32_t StreamID, uint64_t InstanceID, xpti_td *TraceEvent, uint16_t Type, const void *Addr); #endif -RTDeviceBinaryImage * -retrieveAMDGCNOrNVPTXKernelBinary(const DeviceImplPtr DeviceImpl, - const std::string &KernelName); class queue_impl; class event_impl; @@ -731,7 +728,7 @@ class UpdateCommandBufferCommand : public Command { // Enqueues a given kernel to a ur_exp_command_buffer_handle_t ur_result_t enqueueImpCommandBufferKernel( - context Ctx, DeviceImplPtr DeviceImpl, + context Ctx, device_impl &DeviceImpl, ur_exp_command_buffer_handle_t CommandBuffer, const CGExecKernel &CommandGroup, std::vector &SyncPoints, diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 85bc93f7d6a9a..d4a2a3cef1251 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -216,7 +216,7 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, std::vector Devices = InteropCtxPtr->get_info(); assert(Devices.size() != 0); - DeviceImplPtr Dev = detail::getSyclObjImpl(Devices[0]); + device_impl &Dev = *detail::getSyclObjImpl(Devices[0]); // Since all the Scheduler commands require queue but we have only context // here, we need to create a dummy queue bound to the context and one of the diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index dea4fde1339d3..1ea02f73b3846 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -592,10 +592,10 @@ device get_pointer_device(const void *Ptr, const context &Ctxt) { // The device is not necessarily a member of the context, it could be a // member's descendant instead. Fetch the corresponding device from the cache. - std::shared_ptr DevImpl = - detail::getSyclObjImpl(Ctxt)->getPlatformImpl().getDeviceImpl(DeviceId); - if (DevImpl) - return detail::createSyclObjFromImpl(DevImpl); + if (detail::device_impl *DevImpl = + detail::getSyclObjImpl(Ctxt)->getPlatformImpl().getDeviceImpl( + DeviceId)) + return detail::createSyclObjFromImpl(*DevImpl); throw exception(make_error_code(errc::runtime), "Cannot find device associated with USM allocation!"); } diff --git a/sycl/source/device.cpp b/sycl/source/device.cpp index 31a7c1612d0e5..ca5a6f1b694fc 100644 --- a/sycl/source/device.cpp +++ b/sycl/source/device.cpp @@ -41,7 +41,8 @@ device::device(cl_device_id DeviceId) { detail::ur::cast(DeviceId), Adapter->getUrAdapter(), nullptr, &Device); impl = detail::platform_impl::getPlatformFromUrDevice(Device, Adapter) - .getOrMakeDeviceImpl(Device); + .getOrMakeDeviceImpl(Device) + .shared_from_this(); __SYCL_OCL_CALL(clRetainDevice, DeviceId); } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b4e440ff4cca1..46f0b4370bbb8 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -43,13 +43,13 @@ inline namespace _V1 { namespace detail { -const DeviceImplPtr &getDeviceImplFromHandler(handler &CGH) { +device_impl &getDeviceImplFromHandler(handler &CGH) { assert((CGH.MQueue || getSyclObjImpl(CGH)->MGraph) && "One of MQueue or MGraph should be nonnull!"); if (CGH.MQueue) - return CGH.MQueue->getDeviceImplPtr(); + return CGH.MQueue->getDeviceImpl(); - return getSyclObjImpl(CGH)->MGraph->getDeviceImplPtr(); + return getSyclObjImpl(CGH)->MGraph->getDeviceImpl(); } bool isDeviceGlobalUsedInKernel(const void *DeviceGlobalPtr) { @@ -1887,8 +1887,8 @@ void handler::verifyDeviceHasProgressGuarantee( using execution_scope = sycl::ext::oneapi::experimental::execution_scope; using forward_progress = sycl::ext::oneapi::experimental::forward_progress_guarantee; - auto deviceImplPtr = MQueue->getDeviceImplPtr(); - const bool supported = deviceImplPtr->supportsForwardProgress( + device_impl &deviceImpl = MQueue->getDeviceImpl(); + const bool supported = deviceImpl.supportsForwardProgress( guarantee, threadScope, coordinationScope); if (threadScope == execution_scope::work_group) { if (!supported) { @@ -1961,7 +1961,7 @@ backend handler::getDeviceBackend() const { if (impl->MGraph) return impl->MGraph->getDevice().get_backend(); else - return MQueue->getDeviceImplPtr()->getBackend(); + return MQueue->getDeviceImpl().getBackend(); } void handler::ext_intel_read_host_pipe(detail::string_view Name, void *Ptr, @@ -2015,18 +2015,17 @@ void handler::memcpyToHostOnlyDeviceGlobal(const void *DeviceGlobalPtr, size_t NumBytes, size_t Offset) { std::weak_ptr WeakContextImpl = MQueue->getContextImplPtr(); - std::weak_ptr WeakDeviceImpl = - MQueue->getDeviceImplPtr(); - host_task([=] { - // Capture context and device as weak to avoid keeping them alive for too - // long. If they are dead by the time this executes, the operation would not - // have been visible anyway. + detail::device_impl &Dev = MQueue->getDeviceImpl(); + host_task([=, &Dev] { + // Capture context as weak to avoid keeping it alive for too long. If it is + // dead by the time this executes, the operation would not have been visible + // anyway. Devices are alive till library shutdown so capturing a reference + // to one is fine. std::shared_ptr ContextImpl = WeakContextImpl.lock(); - std::shared_ptr DeviceImpl = WeakDeviceImpl.lock(); - if (ContextImpl && DeviceImpl) + if (ContextImpl) ContextImpl->memcpyToHostOnlyDeviceGlobal( - DeviceImpl, DeviceGlobalPtr, Src, DeviceGlobalTSize, - IsDeviceImageScoped, NumBytes, Offset); + Dev, DeviceGlobalPtr, Src, DeviceGlobalTSize, IsDeviceImageScoped, + NumBytes, Offset); }); } @@ -2036,12 +2035,13 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest, size_t NumBytes, size_t Offset) { const std::shared_ptr &ContextImpl = MQueue->getContextImplPtr(); - const std::shared_ptr &DeviceImpl = - MQueue->getDeviceImplPtr(); - host_task([=] { - // Unlike memcpy to device_global, we need to keep the context and device - // alive in the capture of this operation as we must be able to correctly - // copy the value to the user-specified pointer. + detail::device_impl &DeviceImpl = MQueue->getDeviceImpl(); + host_task([=, &DeviceImpl] { + // Unlike memcpy to device_global, we need to keep the context alive in the + // capture of this operation as we must be able to correctly copy the value + // to the user-specified pointer. Device is guaranteed to live until SYCL RT + // library shutdown (but even if it wasn't, alive conext has to guarantee + // alive device). ContextImpl->memcpyFromHostOnlyDeviceGlobal( DeviceImpl, Dest, DeviceGlobalPtr, IsDeviceImageScoped, NumBytes, Offset); @@ -2114,10 +2114,10 @@ void handler::setUserFacingNodeType(ext::oneapi::experimental::node_type Type) { } std::optional> handler::getMaxWorkGroups() { - const auto &DeviceImpl = detail::getDeviceImplFromHandler(*this); + device_impl &DeviceImpl = detail::getDeviceImplFromHandler(*this); std::array UrResult = {}; - auto Ret = DeviceImpl->getAdapter()->call_nocheck( - DeviceImpl->getHandleRef(), + auto Ret = DeviceImpl.getAdapter()->call_nocheck( + DeviceImpl.getHandleRef(), UrInfoCode< ext::oneapi::experimental::info::device::max_work_groups<3>>::value, sizeof(UrResult), &UrResult, nullptr); diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index d4b5de19ea74a..6c567b52081d2 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -392,7 +392,7 @@ namespace detail { bool is_source_kernel_bundle_supported( sycl::ext::oneapi::experimental::source_language Language, - const std::vector &DeviceImplVec) { + const std::vector &DeviceImplVec) { backend BE = DeviceImplVec[0]->getBackend(); // Support is limited to the opencl and level_zero backends. bool BE_Acceptable = (BE == sycl::backend::ext_oneapi_level_zero) || @@ -413,10 +413,9 @@ bool is_source_kernel_bundle_supported( IPVersionVec.reserve(DeviceImplVec.size()); std::transform(DeviceImplVec.begin(), DeviceImplVec.end(), - std::back_inserter(IPVersionVec), - [&](const DeviceImplPtr &Impl) { + std::back_inserter(IPVersionVec), [&](device_impl *Dev) { uint32_t ipVersion = 0; - ur_device_handle_t DeviceHandle = Impl->getHandleRef(); + ur_device_handle_t DeviceHandle = Dev->getHandleRef(); Adapter->call( DeviceHandle, UR_DEVICE_INFO_IP_VERSION, sizeof(uint32_t), &ipVersion, nullptr); @@ -434,12 +433,12 @@ bool is_source_kernel_bundle_supported( sycl::ext::oneapi::experimental::source_language Language, const context &Ctx) { const std::vector Devices = Ctx.get_devices(); - std::vector DeviceImplVec; + std::vector DeviceImplVec; DeviceImplVec.reserve(Devices.size()); std::transform(Devices.begin(), Devices.end(), std::back_inserter(DeviceImplVec), [](const sycl::device &dev) { - return sycl::detail::getSyclObjImpl(dev); + return &*sycl::detail::getSyclObjImpl(dev); }); return is_source_kernel_bundle_supported(Language, DeviceImplVec); diff --git a/sycl/source/physical_mem.cpp b/sycl/source/physical_mem.cpp index d9d6073a68e89..9284927596592 100644 --- a/sycl/source/physical_mem.cpp +++ b/sycl/source/physical_mem.cpp @@ -21,7 +21,7 @@ physical_mem::physical_mem(const device &SyclDevice, const context &SyclContext, "Device does not support aspect::ext_oneapi_virtual_mem."); impl = std::make_shared( - SyclDevice, SyclContext, NumBytes); + *getSyclObjImpl(SyclDevice), SyclContext, NumBytes); } void *physical_mem::map(uintptr_t Ptr, size_t NumBytes, diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index ec2fe68792c51..4383bb4a2d49a 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -62,21 +62,21 @@ queue::queue(const context &SyclContext, const device_selector &DeviceSelector, const device &SyclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp); impl = std::make_shared( - detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), + *detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); } queue::queue(const context &SyclContext, const device &SyclDevice, const async_handler &AsyncHandler, const property_list &PropList) { impl = std::make_shared( - detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), + *detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); } queue::queue(const device &SyclDevice, const async_handler &AsyncHandler, const property_list &PropList) { impl = std::make_shared( - detail::getSyclObjImpl(SyclDevice), AsyncHandler, PropList); + *detail::getSyclObjImpl(SyclDevice), AsyncHandler, PropList); } queue::queue(const context &SyclContext, const device_selector &deviceSelector, @@ -420,7 +420,7 @@ event queue::memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, bool queue::device_has(aspect Aspect) const { // avoid creating sycl object from impl - return impl->getDeviceImplPtr()->has(Aspect); + return impl->getDeviceImpl().has(Aspect); } // TODO(#15184) Remove this function in the next ABI-breaking window. diff --git a/sycl/test/gdb/printers.cpp b/sycl/test/gdb/printers.cpp index 2b21341c411ca..7fcacb89aff1f 100644 --- a/sycl/test/gdb/printers.cpp +++ b/sycl/test/gdb/printers.cpp @@ -63,9 +63,9 @@ sycl::range<1> r(3); // CHECK: 32 | backend MBackend // CHECK: 0 | class sycl::detail::device_impl -// CHECK: 8 | ur_device_type_t MType -// CHECK: 24 | class std::shared_ptr MPlatform -// CHECK: 24 | element_type * _M_ptr +// CHECK: 24 | ur_device_type_t MType +// CHECK: 40 | class std::shared_ptr MPlatform +// CHECK: 40 | element_type * _M_ptr // DEVICE: 0 | class sycl::detail::AccessorImplDevice<1> // DEVICE: 0 | class sycl::id<1> Offset @@ -73,10 +73,7 @@ sycl::range<1> r(3); // DEVICE: 16 | class sycl::range<> MemRange // CHECK: 0 | class sycl::detail::queue_impl -// CHECK: 40 | class std::shared_ptr MDevice -// CHECK: 40 | class std::__shared_ptr (base) -// CHECK: 40 | class std::__shared_ptr_access (base) (empty) -// CHECK: 40 | element_type * _M_ptr +// CHECK: 40 | device_impl & MDevice // CHECK: 0 | class sycl::accessor // HOST: 0 | {{.*}} sycl::detail::AccessorImplHost{{.*}} impl diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 2316f7e5f8066..91cc890cc99fd 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -384,13 +384,13 @@ TEST(ImageRemoval, NativePrograms) { sycl::queue Queue{Dev}; auto Ctx = Queue.get_context(); auto ProgramA = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), - sycl::detail::getSyclObjImpl(Dev), + *sycl::detail::getSyclObjImpl(Dev), generateRefName("A", "Kernel")); auto ProgramB = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), - sycl::detail::getSyclObjImpl(Dev), + *sycl::detail::getSyclObjImpl(Dev), generateRefName("B", "Kernel")); std::ignore = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), - sycl::detail::getSyclObjImpl(Dev), + *sycl::detail::getSyclObjImpl(Dev), generateRefName("C", "Kernel")); EXPECT_EQ(PM.getNativePrograms().size(), diff --git a/sycl/unittests/program_manager/SubDevices.cpp b/sycl/unittests/program_manager/SubDevices.cpp index 39163a15c8f91..a577a94d9a04c 100644 --- a/sycl/unittests/program_manager/SubDevices.cpp +++ b/sycl/unittests/program_manager/SubDevices.cpp @@ -106,8 +106,8 @@ TEST(SubDevices, DISABLED_BuildProgramForSubdevices) { rootDevice = sycl::detail::getSyclObjImpl(device)->getHandleRef(); // Initialize sub-devices sycl::detail::platform_impl &PltImpl = *sycl::detail::getSyclObjImpl(Plt); - auto subDev1 = PltImpl.getOrMakeDeviceImpl(urSubDev1); - auto subDev2 = PltImpl.getOrMakeDeviceImpl(urSubDev2); + sycl::detail::device_impl &subDev1 = PltImpl.getOrMakeDeviceImpl(urSubDev1); + sycl::detail::device_impl &subDev2 = PltImpl.getOrMakeDeviceImpl(urSubDev2); sycl::context Ctx{ {device, sycl::detail::createSyclObjFromImpl(subDev1), sycl::detail::createSyclObjFromImpl(subDev2)}}; diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 9d5795b3ddda7..55cc4f790353b 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -285,7 +285,7 @@ TEST(EliminatedArgMask, ReuseOfHandleValues) { sycl::queue Queue{Dev}; auto Ctx = Queue.get_context(); ProgBefore = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), - sycl::detail::getSyclObjImpl(Dev), Name); + *sycl::detail::getSyclObjImpl(Dev), Name); auto Mask = PM.getEliminatedKernelArgMask(ProgBefore, Name); EXPECT_NE(Mask, nullptr); EXPECT_EQ(Mask->at(0), 1); @@ -310,7 +310,7 @@ TEST(EliminatedArgMask, ReuseOfHandleValues) { sycl::queue Queue{Dev}; auto Ctx = Queue.get_context(); ProgAfter = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), - sycl::detail::getSyclObjImpl(Dev), Name); + *sycl::detail::getSyclObjImpl(Dev), Name); auto Mask = PM.getEliminatedKernelArgMask(ProgAfter, Name); EXPECT_NE(Mask, nullptr); EXPECT_EQ(Mask->at(0), 0); diff --git a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp index 9dbd9e0b34346..fda6e32ba281d 100644 --- a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp +++ b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp @@ -21,13 +21,12 @@ namespace { using namespace sycl; using EventImplPtr = std::shared_ptr; using ContextImplPtr = std::shared_ptr; -using DeviceImplPtr = std::shared_ptr; constexpr auto DisableCleanupName = "SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP"; class TestQueueImpl : public sycl::detail::queue_impl { public: - TestQueueImpl(ContextImplPtr SyclContext, DeviceImplPtr Dev) + TestQueueImpl(ContextImplPtr SyclContext, sycl::detail::device_impl &Dev) : sycl::detail::queue_impl(Dev, SyclContext, SyclContext->get_async_handler(), {}) {} using sycl::detail::queue_impl::MDefaultGraphDeps; @@ -47,7 +46,7 @@ class BarrierHandlingWithHostTask : public ::testing::Test { sycl::detail::select_device(sycl::default_selector_v, SyclContext); QueueDevImpl.reset( new TestQueueImpl(sycl::detail::getSyclObjImpl(SyclContext), - sycl::detail::getSyclObjImpl(SyclDev))); + *sycl::detail::getSyclObjImpl(SyclDev))); MainLock.lock(); } diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 7fca8a4184541..2cb53e082d6df 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -22,7 +22,7 @@ using ::testing::An; class MockQueueImpl : public sycl::detail::queue_impl { public: - MockQueueImpl(const sycl::detail::DeviceImplPtr &Device, + MockQueueImpl(sycl::detail::device_impl &Device, const sycl::async_handler &AsyncHandler, const sycl::property_list &PropList) : sycl::detail::queue_impl(Device, AsyncHandler, PropList) {} @@ -77,7 +77,7 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { const sycl::device Dev = Plt.get_devices()[0]; auto Queue = std::make_shared( - sycl::detail::getSyclObjImpl(Dev), sycl::async_handler{}, + *sycl::detail::getSyclObjImpl(Dev), sycl::async_handler{}, sycl::property::queue::in_order()); // Check that tasks submitted to an in-order queue implicitly depend_on the