diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 6a7a173bea915..83740b9b64f25 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -573,6 +573,7 @@ class __SYCL_EXPORT AccessorBaseHost { const range<3> &getMemoryRange() const; void *getPtr() const noexcept; bool isPlaceholder() const; + bool isMemoryObjectUsedByGraph() const; detail::AccHostDataT &getAccData(); @@ -1487,6 +1488,18 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : typename std::iterator_traits::difference_type; using size_type = std::size_t; + /// If creating a host_accessor this checks to see if the underlying memory + /// object is currently in use by a command_graph, and throws if it is. + void throwIfUsedByGraph() const { +#ifndef __SYCL_DEVICE_ONLY__ + if (IsHostBuf && AccessorBaseHost::isMemoryObjectUsedByGraph()) { + throw sycl::exception(make_error_code(errc::invalid), + "Host accessors cannot be created for buffers " + "which are currently in use by a command graph."); + } +#endif + } + // The list of accessor constructors with their arguments // -------+---------+-------+----+-----+-------------- // Dimensions = 0 @@ -1566,6 +1579,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), AdjustedDim, sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (!AccessorBaseHost::isPlaceholder()) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1605,6 +1619,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), AdjustedDim, sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (!AccessorBaseHost::isPlaceholder()) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1640,6 +1655,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); initHostAcc(); @@ -1676,6 +1692,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); initHostAcc(); @@ -1708,6 +1725,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (!AccessorBaseHost::isPlaceholder()) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1743,6 +1761,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (!AccessorBaseHost::isPlaceholder()) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -1805,6 +1824,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); initHostAcc(); @@ -1839,6 +1859,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); initHostAcc(); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); @@ -2014,6 +2035,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (!AccessorBaseHost::isPlaceholder()) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -2056,6 +2078,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), IsPlaceH, BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (!AccessorBaseHost::isPlaceholder()) addHostAccessorAndWait(AccessorBaseHost::impl.get()); @@ -2127,6 +2150,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -2169,6 +2193,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { + throwIfUsedByGraph(); preScreenAccessor(PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) diff --git a/sycl/include/sycl/detail/property_helper.hpp b/sycl/include/sycl/detail/property_helper.hpp index a76efb84304fc..81cf668f523fc 100644 --- a/sycl/include/sycl/detail/property_helper.hpp +++ b/sycl/include/sycl/detail/property_helper.hpp @@ -44,8 +44,10 @@ enum DataLessPropKind { GraphNoCycleCheck = 19, QueueSubmissionBatched = 20, QueueSubmissionImmediate = 21, + GraphAssumeDataOutlivesBuffer = 22, + GraphAssumeBufferOutlivesGraph = 23, // Indicates the last known dataless property. - LastKnownDataLessPropKind = 21, + LastKnownDataLessPropKind = 23, // Exceeding 32 may cause ABI breaking change on some of OSes. DataLessPropKindSize = 32 }; diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp index 45618c2793543..94ce494337d8c 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp @@ -101,13 +101,22 @@ namespace graph { /// Property passed to command_graph constructor to disable checking for cycles. /// -/// \todo Cycle check not yet implemented. class no_cycle_check : public ::sycl::detail::DataLessProperty< ::sycl::detail::GraphNoCycleCheck> { public: no_cycle_check() = default; }; +/// Property passed to command_graph constructor to allow buffers to be used +/// with graphs. Passing this property represents a promise from the user that +/// the buffer will outlive any graph that it is used in. +/// +class assume_buffer_outlives_graph + : public ::sycl::detail::DataLessProperty< + ::sycl::detail::GraphAssumeBufferOutlivesGraph> { +public: + assume_buffer_outlives_graph() = default; +}; } // namespace graph namespace node { diff --git a/sycl/include/sycl/info/ext_oneapi_device_traits.def b/sycl/include/sycl/info/ext_oneapi_device_traits.def index 07e2342cad1e7..c91c52b03474f 100644 --- a/sycl/include/sycl/info/ext_oneapi_device_traits.def +++ b/sycl/include/sycl/info/ext_oneapi_device_traits.def @@ -11,7 +11,7 @@ __SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, architecture, PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION) __SYCL_PARAM_TRAITS_SPEC( ext::oneapi::experimental, device, graph_support, - ext::oneapi::experimental::info::graph_support_level, + ext::oneapi::experimental::graph_support_level, 0 /* No PI device code needed */) // Bindless images pitched allocation diff --git a/sycl/include/sycl/info/info_desc.hpp b/sycl/include/sycl/info/info_desc.hpp index d8589f4d13f87..9f9c5da9f50dd 100644 --- a/sycl/include/sycl/info/info_desc.hpp +++ b/sycl/include/sycl/info/info_desc.hpp @@ -191,14 +191,14 @@ template struct compatibility_param_traits {}; } /*namespace info */ \ } /*namespace Namespace */ -namespace ext::oneapi::experimental::info { +namespace ext::oneapi::experimental { -enum class graph_support_level { unsupported = 0, native, emulated }; +enum class graph_support_level { unsupported = 0, native = 1, emulated = 2 }; -namespace device { +namespace info::device { template struct max_work_groups; -} // namespace device -} // namespace ext::oneapi::experimental::info +} // namespace info::device +} // namespace ext::oneapi::experimental #include #include #include diff --git a/sycl/source/accessor.cpp b/sycl/source/accessor.cpp index 50239b51ea9b5..c28d312296a2d 100644 --- a/sycl/source/accessor.cpp +++ b/sycl/source/accessor.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include +#include #include namespace sycl { @@ -94,6 +95,10 @@ void *AccessorBaseHost::getMemoryObject() const { return impl->MSYCLMemObj; } bool AccessorBaseHost::isPlaceholder() const { return impl->MIsPlaceH; } +bool AccessorBaseHost::isMemoryObjectUsedByGraph() const { + return static_cast(impl->MSYCLMemObj)->isUsedInGraph(); +} + LocalAccessorBaseHost::LocalAccessorBaseHost( sycl::range<3> Size, int Dims, int ElemSize, const property_list &PropertyList) { diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index df1c826af4253..0edd778112a9d 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -934,16 +934,16 @@ struct get_device_info_impl< // Specialization for graph extension support template <> struct get_device_info_impl< - ext::oneapi::experimental::info::graph_support_level, + ext::oneapi::experimental::graph_support_level, ext::oneapi::experimental::info::device::graph_support> { - static ext::oneapi::experimental::info::graph_support_level + static ext::oneapi::experimental::graph_support_level get(const DeviceImplPtr &Dev) { size_t ResultSize = 0; Dev->getPlugin()->call( Dev->getHandleRef(), PI_DEVICE_INFO_EXTENSIONS, 0, nullptr, &ResultSize); if (ResultSize == 0) - return ext::oneapi::experimental::info::graph_support_level::unsupported; + return ext::oneapi::experimental::graph_support_level::unsupported; std::unique_ptr Result(new char[ResultSize]); Dev->getPlugin()->call( @@ -954,9 +954,8 @@ struct get_device_info_impl< bool CmdBufferSupport = ExtensionsString.find("ur_exp_command_buffer") != std::string::npos; return CmdBufferSupport - ? ext::oneapi::experimental::info::graph_support_level::native - : ext::oneapi::experimental::info::graph_support_level:: - unsupported; + ? ext::oneapi::experimental::graph_support_level::native + : ext::oneapi::experimental::graph_support_level::unsupported; } }; @@ -1862,10 +1861,10 @@ inline uint32_t get_device_info_host< } template <> -inline ext::oneapi::experimental::info::graph_support_level +inline ext::oneapi::experimental::graph_support_level get_device_info_host() { // No support for graphs on the host device. - return ext::oneapi::experimental::info::graph_support_level::unsupported; + return ext::oneapi::experimental::graph_support_level::unsupported; } template <> diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index aa51eb9d0c44c..fdc59b45cb6d2 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -72,6 +73,40 @@ bool checkForRequirement(sycl::detail::AccessorImplHost *Req, return SuccessorAddedDep; } +/// Visits a node on the graph and it's successors recursively in a depth-first +/// approach. +/// @param[in] Node The current node being visited. +/// @param[in,out] VisitedNodes A set of unique nodes which have already been +/// visited. +/// @param[in] NodeStack Stack of nodes which are currently being visited on the +/// current path through the graph. +/// @param[in] NodeFunc The function object to be run on each node. A return +/// value of true indicates the search should be ended immediately and the +/// function will return. +/// @return True if the search should end immediately, false if not. +bool visitNodeDepthFirst( + std::shared_ptr Node, + std::set> &VisitedNodes, + std::deque> &NodeStack, + std::function &, + std::deque> &)> + NodeFunc) { + auto EarlyReturn = NodeFunc(Node, NodeStack); + if (EarlyReturn) { + return true; + } + NodeStack.push_back(Node); + Node->MVisited = true; + VisitedNodes.emplace(Node); + for (auto &Successor : Node->MSuccessors) { + if (visitNodeDepthFirst(Successor, VisitedNodes, NodeStack, NodeFunc)) { + return true; + } + } + NodeStack.pop_back(); + return false; +} + void duplicateNode(const std::shared_ptr Node, std::shared_ptr &NodeCopy) { if (Node->MCGType == sycl::detail::CG::None) { @@ -92,6 +127,13 @@ void exec_graph_impl::schedule() { } } +graph_impl::~graph_impl() { + clearQueues(); + for (auto &MemObj : MMemObjs) { + MemObj->markNoLongerBeingUsedInGraph(); + } +} + std::shared_ptr graph_impl::addNodesToExits( const std::list> &NodeList) { // Find all input and output nodes from the node list @@ -219,7 +261,20 @@ graph_impl::add(sycl::detail::CG::CGTYPE CGType, // A unique set of dependencies obtained by checking requirements and events std::set> UniqueDeps; const auto &Requirements = CommandGroup->getRequirements(); + if (!MAllowBuffers && Requirements.size()) { + throw sycl::exception(make_error_code(errc::invalid), + "Cannot use buffers in a graph without passing the " + "assume_buffer_outlives_graph property on " + "Graph construction."); + } + for (auto &Req : Requirements) { + // Track and mark the memory objects being used by the graph. + auto MemObj = static_cast(Req->MSYCLMemObj); + bool WasInserted = MMemObjs.insert(MemObj).second; + if (WasInserted) { + MemObj->markBeingUsedInGraph(); + } // Look through the graph for nodes which share this requirement for (auto &NodePtr : MRoots) { checkForRequirement(Req, NodePtr, UniqueDeps); @@ -259,8 +314,8 @@ graph_impl::add(sycl::detail::CG::CGTYPE CGType, bool graph_impl::clearQueues() { bool AnyQueuesCleared = false; for (auto &Queue : MRecordingQueues) { - if (Queue) { - Queue->setCommandGraph(nullptr); + if (auto ValidQueue = Queue.lock(); ValidQueue) { + ValidQueue->setCommandGraph(nullptr); AnyQueuesCleared = true; } } @@ -269,6 +324,101 @@ bool graph_impl::clearQueues() { return AnyQueuesCleared; } +void graph_impl::searchDepthFirst( + std::function &, + std::deque> &)> + NodeFunc) { + // Track nodes visited during the search which can be used by NodeFunc in + // depth first search queries. Currently unusued but is an + // integral part of depth first searches. + std::set> VisitedNodes; + + for (auto &Root : MRoots) { + std::deque> NodeStack; + if (visitNodeDepthFirst(Root, VisitedNodes, NodeStack, NodeFunc)) { + break; + } + } + + // Reset the visited status of all nodes encountered in the search. + for (auto &Node : VisitedNodes) { + Node->MVisited = false; + } +} + +bool graph_impl::checkForCycles() { + // Using a depth-first search and checking if we vist a node more than once in + // the current path to identify if there are cycles. + bool CycleFound = false; + auto CheckFunc = [&](std::shared_ptr &Node, + std::deque> &NodeStack) { + // If the current node has previously been found in the current path through + // the graph then we have a cycle and we end the search early. + if (std::find(NodeStack.begin(), NodeStack.end(), Node) != + NodeStack.end()) { + CycleFound = true; + return true; + } + return false; + }; + searchDepthFirst(CheckFunc); + return CycleFound; +} + +void graph_impl::makeEdge(std::shared_ptr Src, + std::shared_ptr Dest) { + throwIfGraphRecordingQueue("make_edge()"); + if (Src == Dest) { + throw sycl::exception( + make_error_code(sycl::errc::invalid), + "make_edge() cannot be called when Src and Dest are the same."); + } + + bool SrcFound = false; + bool DestFound = false; + auto CheckForNodes = [&](std::shared_ptr &Node, + std::deque> &) { + if (Node == Src) { + SrcFound = true; + } + if (Node == Dest) { + DestFound = true; + } + return SrcFound && DestFound; + }; + + searchDepthFirst(CheckForNodes); + + if (!SrcFound) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Src must be a node inside the graph."); + } + if (!DestFound) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Dest must be a node inside the graph."); + } + + // We need to add the edges first before checking for cycles + Src->registerSuccessor(Dest, Src); + + // We can skip cycle checks if either Dest has no successors (cycle not + // possible) or cycle checks have been disabled with the no_cycle_check + // property; + if (Dest->MSuccessors.empty() || !MSkipCycleChecks) { + bool CycleFound = checkForCycles(); + + if (CycleFound) { + // Remove the added successor and predecessor + Src->MSuccessors.pop_back(); + Dest->MPredecessors.pop_back(); + + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Command graphs cannot contain cycles."); + } + } + removeRoot(Dest); // remove receiver from root node list +} + // Check if nodes are empty and if so loop back through predecessors until we // find the real dependency. void exec_graph_impl::findRealDeps( @@ -379,6 +529,11 @@ void exec_graph_impl::createCommandBuffers(sycl::device Device) { MRequirements.insert(MRequirements.end(), Node->MCommandGroup->getRequirements().begin(), Node->MCommandGroup->getRequirements().end()); + // Also store the actual accessor to make sure they are kept alive when + // commands are submitted + MAccessors.insert(MAccessors.end(), + Node->MCommandGroup->getAccStorage().begin(), + Node->MCommandGroup->getAccStorage().end()); } Res = @@ -438,6 +593,9 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, // handler. CGData.MRequirements.insert(CGData.MRequirements.end(), MRequirements.begin(), MRequirements.end()); + CGData.MAccStorage.insert(CGData.MAccStorage.end(), MAccessors.begin(), + MAccessors.end()); + // If we have no requirements or dependent events for the command buffer, // enqueue it directly if (CGData.MRequirements.empty() && CGData.MEvents.empty()) { @@ -525,10 +683,12 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, modifiable_command_graph::modifiable_command_graph( const sycl::context &SyclContext, const sycl::device &SyclDevice, - const sycl::property_list &) - : impl(std::make_shared(SyclContext, SyclDevice)) {} + const sycl::property_list &PropList) + : impl(std::make_shared(SyclContext, SyclDevice, + PropList)) {} node modifiable_command_graph::addImpl(const std::vector &Deps) { + impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); std::vector> DepImpls; for (auto &D : Deps) { DepImpls.push_back(sycl::detail::getSyclObjImpl(D)); @@ -541,6 +701,7 @@ node modifiable_command_graph::addImpl(const std::vector &Deps) { node modifiable_command_graph::addImpl(std::function CGF, const std::vector &Deps) { + impl->throwIfGraphRecordingQueue("Explicit API \"Add()\" function"); std::vector> DepImpls; for (auto &D : Deps) { DepImpls.push_back(sycl::detail::getSyclObjImpl(D)); @@ -559,9 +720,7 @@ void modifiable_command_graph::make_edge(node &Src, node &Dest) { sycl::detail::getSyclObjImpl(Dest); graph_impl::WriteLock Lock(impl->MMutex); - SenderImpl->registerSuccessor(ReceiverImpl, - SenderImpl); // register successor - impl->removeRoot(ReceiverImpl); // remove receiver from root node list + impl->makeEdge(SenderImpl, ReceiverImpl); } command_graph @@ -675,7 +834,7 @@ void executable_command_graph::finalizeImpl() { bool CmdBufSupport = Device.get_info< ext::oneapi::experimental::info::device::graph_support>() == - info::graph_support_level::native; + graph_support_level::native; #if FORCE_EMULATION_MODE // Above query should still succeed in emulation mode, but ignore the diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index c2e9dcb8921c0..77c50e29b2eb2 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,10 @@ namespace sycl { inline namespace _V1 { +namespace detail { +class SYCLMemObjT; +} + namespace ext { namespace oneapi { namespace experimental { @@ -44,6 +49,9 @@ class node_impl { /// Command group object which stores all args etc needed to enqueue the node std::unique_ptr MCommandGroup; + /// Used for tracking visited status during cycle checks. + bool MVisited = false; + /// Add successor to the node. /// @param Node Node to add as a successor. /// @param Prev Predecessor to \p node being added as successor. @@ -52,6 +60,10 @@ class node_impl { /// use a raw \p this pointer, so the extra \Prev parameter is passed. void registerSuccessor(const std::shared_ptr &Node, const std::shared_ptr &Prev) { + if (std::find(MSuccessors.begin(), MSuccessors.end(), Node) != + MSuccessors.end()) { + return; + } MSuccessors.push_back(Node); Node->registerPredecessor(Prev); } @@ -59,6 +71,12 @@ class node_impl { /// Add predecessor to the node. /// @param Node Node to add as a predecessor. void registerPredecessor(const std::shared_ptr &Node) { + if (std::find_if(MPredecessors.begin(), MPredecessors.end(), + [&Node](const std::weak_ptr &Ptr) { + return Ptr.lock() == Node; + }) != MPredecessors.end()) { + return; + } MPredecessors.push_back(Node); } @@ -279,9 +297,32 @@ class graph_impl { /// Constructor. /// @param SyclContext Context to use for graph. /// @param SyclDevice Device to create nodes with. - graph_impl(const sycl::context &SyclContext, const sycl::device &SyclDevice) + /// @param PropList Optional list of properties. + graph_impl(const sycl::context &SyclContext, const sycl::device &SyclDevice, + const sycl::property_list &PropList = {}) : MContext(SyclContext), MDevice(SyclDevice), MRecordingQueues(), - MEventsMap(), MInorderQueueMap() {} + MEventsMap(), MInorderQueueMap() { + if (PropList.has_property()) { + MSkipCycleChecks = true; + } + if (PropList + .has_property()) { + MAllowBuffers = true; + } + + if (SyclDevice.get_info< + ext::oneapi::experimental::info::device::graph_support>() == + graph_support_level::unsupported) { + std::stringstream Stream; + Stream << SyclDevice.get_backend(); + std::string BackendString = Stream.str(); + throw sycl::exception( + sycl::make_error_code(errc::invalid), + BackendString + " backend is not supported by SYCL Graph extension."); + } + } + + ~graph_impl(); /// Remove node from list of root nodes. /// @param Root Node to remove from list of root nodes. @@ -409,6 +450,26 @@ class graph_impl { MInorderQueueMap[QueueWeakPtr] = Node; } + /// Make an edge between two nodes in the graph. Performs some mandatory + /// error checks as well as an optional check for cycles introduced by making + /// this edge. + /// @param Src The source of the new edge. + /// @param Dest The destination of the new edge. + void makeEdge(std::shared_ptr Src, + std::shared_ptr Dest); + + /// Throws an invalid exception if this function is called + /// while a queue is recording commands to the graph. + /// @param ExceptionMsg Message to append to the exception message + void throwIfGraphRecordingQueue(const std::string ExceptionMsg) const { + if (MRecordingQueues.size()) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + ExceptionMsg + + " cannot be called when a queue " + "is currently recording commands to a graph."); + } + } + /// Checks if the graph_impl of Graph has a similar structure to /// the graph_impl of the caller. /// Graphs are considered similar if they have same numbers of nodes @@ -499,13 +560,42 @@ class graph_impl { } private: + /// Iterate over the graph depth-first and run \p NodeFunc on each node. + /// @param NodeFunc A function which receives as input a node in the graph to + /// perform operations on as well as the stack of nodes encountered in the + /// current path. The return value of this function determines whether an + /// early exit is triggered, if true the depth-first search will end + /// immediately and no further nodes will be visited. + void + searchDepthFirst(std::function &, + std::deque> &)> + NodeFunc); + + /// Check the graph for cycles by performing a depth-first search of the + /// graph. If a node is visited more than once in a given path through the + /// graph, a cycle is present and the search ends immediately. + /// @return True if a cycle is detected, false if not. + bool checkForCycles(); + + /// Insert node into list of root nodes. + /// @param Root Node to add to list of root nodes. + void addRoot(const std::shared_ptr &Root); + + /// Adds nodes to the exit nodes of this graph. + /// @param NodeList List of nodes from sub-graph in schedule order. + /// @return An empty node is used to schedule dependencies on this sub-graph. + std::shared_ptr + addNodesToExits(const std::list> &NodeList); + /// Context associated with this graph. sycl::context MContext; /// Device associated with this graph. All graph nodes will execute on this /// device. sycl::device MDevice; /// Unique set of queues which are currently recording to this graph. - std::set> MRecordingQueues; + std::set, + std::owner_less>> + MRecordingQueues; /// Map of events to their associated recorded nodes. std::unordered_map, std::shared_ptr> @@ -516,16 +606,15 @@ class graph_impl { std::map, std::shared_ptr, std::owner_less>> MInorderQueueMap; - - /// Insert node into list of root nodes. - /// @param Root Node to add to list of root nodes. - void addRoot(const std::shared_ptr &Root); - - /// Adds nodes to the exit nodes of this graph. - /// @param NodeList List of nodes from sub-graph in schedule order. - /// @return An empty node is used to schedule dependencies on this sub-graph. - std::shared_ptr - addNodesToExits(const std::list> &NodeList); + /// Controls whether we skip the cycle checks in makeEdge, set by the presence + /// of the no_cycle_check property on construction. + bool MSkipCycleChecks = false; + /// Unique set of SYCL Memory Objects which are currently in use in the graph. + std::set MMemObjs; + + /// Controls whether we allow buffers to be used in the graph. Set by the + /// presence of the assume_buffer_outlives_graph property. + bool MAllowBuffers = false; }; /// Class representing the implementation of command_graph. @@ -634,6 +723,9 @@ class exec_graph_impl { /// List of requirements for enqueueing this command graph, accumulated from /// all nodes enqueued to the graph. std::vector MRequirements; + /// Storage for accessors which are used by this graph, accumulated from + /// all nodes enqueued to the graph. + std::vector MAccessors; /// List of all execution events returned from command buffer enqueue calls. std::vector MExecutionEvents; }; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 966b82387db1a..4f3e1fabe7dfd 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -160,7 +160,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, #endif // If we have a command graph set we need to capture the copy through normal // queue submission rather than execute the copy directly. - if (MGraph) { + if (MGraph.lock()) { return submit( [&](handler &CGH) { CGH.depends_on(DepEvents); @@ -495,7 +495,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId); #endif - if (MGraph) { + if (MGraph.lock()) { throw sycl::exception(make_error_code(errc::invalid), "wait cannot be called for a queue which is " "recording to a command graph."); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index ede48b4d574df..71b4a84d1249c 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -690,7 +690,7 @@ class queue_impl { std::shared_ptr getCommandGraph() const { - return MGraph; + return MGraph.lock(); } protected: @@ -866,8 +866,7 @@ class queue_impl { // Command graph which is associated with this queue for the purposes of // recording commands to it. - std::shared_ptr MGraph = - nullptr; + std::weak_ptr MGraph{}; friend class sycl::ext::oneapi::experimental::detail::node_impl; }; diff --git a/sycl/source/detail/sycl_mem_obj_t.hpp b/sycl/source/detail/sycl_mem_obj_t.hpp index fd74b1bc99dc5..967149230bc74 100644 --- a/sycl/source/detail/sycl_mem_obj_t.hpp +++ b/sycl/source/detail/sycl_mem_obj_t.hpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -276,6 +277,32 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { void markAsInternal() { MIsInternal = true; } + /// Returns true if this memory object requires a write_back on destruction. + bool needsWriteBack() const { return MNeedWriteBack && MUploadDataFunctor; } + + /// Increment an internal counter for how many graphs are currently using this + /// memory object. + void markBeingUsedInGraph() { MGraphUseCount += 1; } + + /// Decrement an internal counter for how many graphs are currently using this + /// memory object. + void markNoLongerBeingUsedInGraph() { + // Compare exchange loop to safely decrement MGraphUseCount + while (true) { + size_t CurrentVal = MGraphUseCount; + if (CurrentVal == 0) { + break; + } + if (MGraphUseCount.compare_exchange_strong(CurrentVal, CurrentVal - 1) == + false) { + continue; + } + } + } + + /// Returns true if any graphs are currently using this memory object. + bool isUsedInGraph() const { return MGraphUseCount > 0; } + protected: // An allocateMem helper that determines which host ptr to use void determineHostPtr(const ContextImplPtr &Context, bool InitFromUserData, @@ -320,6 +347,8 @@ class __SYCL_EXPORT SYCLMemObjT : public SYCLMemObjI { // objects can be released in a deferred manner regardless of whether a host // pointer was provided or not. bool MIsInternal = false; + // The number of graphs which are currently using this memory object. + std::atomic MGraphUseCount = 0; }; } // namespace detail } // namespace _V1 diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 92549b363bd48..d96275477ea73 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -502,6 +502,13 @@ void handler::addReduction(const std::shared_ptr &ReduObj) { void handler::associateWithHandlerCommon(detail::AccessorImplPtr AccImpl, int AccTarget) { + if (getCommandGraph() && + static_cast(AccImpl->MSYCLMemObj) + ->needsWriteBack()) { + throw sycl::exception(make_error_code(errc::invalid), + "Accessors to buffers which have write_back enabled " + "are not allowed to be used in command graphs."); + } detail::Requirement *Req = AccImpl.get(); // Add accessor to the list of requirements. if (Req->MAccessRange.size() != 0) diff --git a/sycl/test-e2e/Graph/Explicit/add_node_while_recording.cpp b/sycl/test-e2e/Graph/Explicit/add_node_while_recording.cpp index 2f5376a3a536c..44901e3fb452c 100644 --- a/sycl/test-e2e/Graph/Explicit/add_node_while_recording.cpp +++ b/sycl/test-e2e/Graph/Explicit/add_node_while_recording.cpp @@ -6,9 +6,6 @@ // // CHECK-NOT: LEAK -// Expected Fail as exception not implemented yet -// XFAIL: * - // Tests attempting to add a node to a command_graph while it is being // recorded to by a queue is an error. @@ -30,8 +27,17 @@ int main() { Success = true; } } + assert(Success); - Graph.end_recording(); + Success = false; + try { + Graph.add({}); + } catch (sycl::exception &E) { + auto StdErrc = E.code().value(); + Success = (StdErrc == static_cast(errc::invalid)); + } assert(Success); + + Graph.end_recording(); return 0; } diff --git a/sycl/test-e2e/Graph/Explicit/assume_buffer_outlives_graph_property.cpp b/sycl/test-e2e/Graph/Explicit/assume_buffer_outlives_graph_property.cpp new file mode 100644 index 0000000000000..a7ea5b10974a4 --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/assume_buffer_outlives_graph_property.cpp @@ -0,0 +1,11 @@ +// REQUIRES: level_zero, gpu +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using ZE_DEBUG +// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %} +// +// CHECK-NOT: LEAK + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/assume_buffer_outlives_graph_property.cpp" diff --git a/sycl/test-e2e/Graph/Explicit/cycle_error.cpp b/sycl/test-e2e/Graph/Explicit/cycle_error.cpp new file mode 100644 index 0000000000000..2ca29aa67b9cf --- /dev/null +++ b/sycl/test-e2e/Graph/Explicit/cycle_error.cpp @@ -0,0 +1,86 @@ +// REQUIRES: level_zero, gpu +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Tests that introducing a cycle to the graph will throw when +// property::graph::no_cycle_check is not passed to the graph constructor and +// will not throw when it is. + +#include "../graph_common.hpp" + +void CreateGraphWithCyclesTest(bool DisableCycleChecks) { + + // If we are testing without cycle checks we need to do multiple iterations so + // we can test multiple types of cycle, since introducing a cycle with no + // checks may put the graph into an undefined state. + const size_t Iterations = DisableCycleChecks ? 2 : 1; + + queue Queue; + + property_list Props; + + if (DisableCycleChecks) { + Props = {ext::oneapi::experimental::property::graph::no_cycle_check{}}; + } + + for (size_t i = 0; i < Iterations; i++) { + ext::oneapi::experimental::command_graph Graph{Queue.get_context(), + Queue.get_device(), Props}; + + auto NodeA = Graph.add([&](sycl::handler &CGH) { + CGH.single_task([=]() {}); + }); + auto NodeB = Graph.add([&](sycl::handler &CGH) { + CGH.single_task([=]() {}); + }); + auto NodeC = Graph.add([&](sycl::handler &CGH) { + CGH.single_task([=]() {}); + }); + + // Make normal edges + std::error_code ErrorCode = sycl::make_error_code(sycl::errc::success); + try { + Graph.make_edge(NodeA, NodeB); + Graph.make_edge(NodeB, NodeC); + } catch (const sycl::exception &e) { + ErrorCode = e.code(); + } + + assert(ErrorCode == sycl::errc::success); + + // Introduce cycles to the graph. If we are performing cycle checks we can + // test both cycles, if they are disabled we need to test one per iteration. + if (i == 0 || !DisableCycleChecks) { + ErrorCode = sycl::make_error_code(sycl::errc::success); + try { + Graph.make_edge(NodeC, NodeA); + } catch (const sycl::exception &e) { + ErrorCode = e.code(); + } + + assert(ErrorCode == + (DisableCycleChecks ? sycl::errc::success : sycl::errc::invalid)); + } + + if (i == 1 || !DisableCycleChecks) { + ErrorCode = sycl::make_error_code(sycl::errc::success); + try { + Graph.make_edge(NodeC, NodeB); + } catch (const sycl::exception &e) { + ErrorCode = e.code(); + } + + assert(ErrorCode == + (DisableCycleChecks ? sycl::errc::success : sycl::errc::invalid)); + } + } +} + +int main() { + // Test with cycle checks + CreateGraphWithCyclesTest(false); + // Test without cycle checks + CreateGraphWithCyclesTest(true); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Inputs/assume_buffer_outlives_graph_property.cpp b/sycl/test-e2e/Graph/Inputs/assume_buffer_outlives_graph_property.cpp new file mode 100644 index 0000000000000..415767c7888d9 --- /dev/null +++ b/sycl/test-e2e/Graph/Inputs/assume_buffer_outlives_graph_property.cpp @@ -0,0 +1,52 @@ +// Tests that using a buffer in a graph will throw, unless the +// assume_buffer_outlives_graph property is passed on graph creation. + +#include "../graph_common.hpp" + +int main() { + queue Queue; + + using T = unsigned short; + + buffer Buffer{range<1>{1}}; + Buffer.set_write_back(false); + + // Test with the property + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + std::error_code ErrorCode = make_error_code(sycl::errc::success); + // This should not throw because we have passed the property + try { + add_node(Graph, Queue, [&](handler &CGH) { + auto acc = Buffer.get_access(CGH); + CGH.single_task([=]() {}); + }); + } catch (const sycl::exception &e) { + ErrorCode = e.code(); + } + assert(ErrorCode == sycl::errc::success); + } + + // Test without the property + { + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + std::error_code ErrorCode = make_error_code(sycl::errc::success); + // This should throw because we have not passed the property + try { + add_node(Graph, Queue, [&](handler &CGH) { + auto acc = Buffer.get_access(CGH); + CGH.single_task([=]() {}); + }); + } catch (const sycl::exception &e) { + ErrorCode = e.code(); + } + assert(ErrorCode == sycl::errc::invalid); + } + + return 0; +} diff --git a/sycl/test-e2e/Graph/Inputs/basic_buffer.cpp b/sycl/test-e2e/Graph/Inputs/basic_buffer.cpp index 0d34e3f51a822..e8c6cf8dcb760 100644 --- a/sycl/test-e2e/Graph/Inputs/basic_buffer.cpp +++ b/sycl/test-e2e/Graph/Inputs/basic_buffer.cpp @@ -18,28 +18,32 @@ int main() { calculate_reference_data(Iterations, Size, ReferenceA, ReferenceB, ReferenceC); - { - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; buffer BufferA{DataA.data(), range<1>{DataA.size()}}; BufferA.set_write_back(false); buffer BufferB{DataB.data(), range<1>{DataB.size()}}; BufferB.set_write_back(false); buffer BufferC{DataC.data(), range<1>{DataC.size()}}; BufferC.set_write_back(false); - - // Add commands to graph - add_nodes(Graph, Queue, Size, BufferA, BufferB, BufferC); - - auto GraphExec = Graph.finalize(); - - event Event; - for (unsigned n = 0; n < Iterations; n++) { - Event = Queue.submit([&](handler &CGH) { - CGH.depends_on(Event); - CGH.ext_oneapi_graph(GraphExec); - }); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + // Add commands to graph + add_nodes(Graph, Queue, Size, BufferA, BufferB, BufferC); + + auto GraphExec = Graph.finalize(); + + event Event; + for (unsigned n = 0; n < Iterations; n++) { + Event = Queue.submit([&](handler &CGH) { + CGH.depends_on(Event); + CGH.ext_oneapi_graph(GraphExec); + }); + } + Queue.wait_and_throw(); } - Queue.wait_and_throw(); host_accessor HostAccA(BufferA); host_accessor HostAccB(BufferB); @@ -50,7 +54,6 @@ int main() { assert(ReferenceB[i] == HostAccB[i]); assert(ReferenceC[i] == HostAccC[i]); } - } return 0; } diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy.cpp index 56623b53b2d36..9f092fa44b07a 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy.cpp @@ -26,8 +26,6 @@ int main() { } } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA{DataA}; BufferA.set_write_back(false); buffer BufferB{DataB}; @@ -35,79 +33,86 @@ int main() { buffer BufferC{DataC}; BufferC.set_write_back(false); - // Copy from B to A - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - auto AccB = BufferB.get_access(CGH); - CGH.copy(AccB, AccA); - }); - - // Read & write A - auto NodeB = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - CGH.parallel_for(range<1>(Size), [=](item<1> id) { - auto LinID = id.get_linear_id(); - AccA[LinID] += ModValue; - }); - }, - NodeA); - - // Read & write B - auto NodeModB = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - CGH.parallel_for(range<1>(Size), [=](item<1> id) { - auto LinID = id.get_linear_id(); - AccB[LinID] += ModValue; - }); - }, - NodeA); - - // memcpy from A to B - auto NodeC = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - auto AccB = BufferB.get_access(CGH); - CGH.copy(AccA, AccB); - }, - NodeB, NodeModB); - - // Read and write B - auto NodeD = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - CGH.parallel_for(range<1>(Size), [=](item<1> id) { - auto LinID = id.get_linear_id(); - AccB[LinID] += ModValue; - }); - }, - NodeC); - - // Copy from B to C - add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - auto AccC = BufferC.get_access(CGH); - CGH.copy(AccB, AccC); - }, - NodeD); - - auto GraphExec = Graph.finalize(); - - event Event; - for (unsigned n = 0; n < Iterations; n++) { - Event = Queue.submit([&](handler &CGH) { - CGH.depends_on(Event); - CGH.ext_oneapi_graph(GraphExec); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + // Copy from B to A + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + CGH.copy(AccB, AccA); }); + + // Read & write A + auto NodeB = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + CGH.parallel_for(range<1>(Size), [=](item<1> id) { + auto LinID = id.get_linear_id(); + AccA[LinID] += ModValue; + }); + }, + NodeA); + + // Read & write B + auto NodeModB = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + CGH.parallel_for(range<1>(Size), [=](item<1> id) { + auto LinID = id.get_linear_id(); + AccB[LinID] += ModValue; + }); + }, + NodeA); + + // memcpy from A to B + auto NodeC = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + CGH.copy(AccA, AccB); + }, + NodeB, NodeModB); + + // Read and write B + auto NodeD = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + CGH.parallel_for(range<1>(Size), [=](item<1> id) { + auto LinID = id.get_linear_id(); + AccB[LinID] += ModValue; + }); + }, + NodeC); + + // Copy from B to C + add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + auto AccC = BufferC.get_access(CGH); + CGH.copy(AccB, AccC); + }, + NodeD); + + auto GraphExec = Graph.finalize(); + + event Event; + for (unsigned n = 0; n < Iterations; n++) { + Event = Queue.submit([&](handler &CGH) { + CGH.depends_on(Event); + CGH.ext_oneapi_graph(GraphExec); + }); + } + Queue.wait_and_throw(); } - Queue.wait_and_throw(); host_accessor HostAccA(BufferA); host_accessor HostAccB(BufferB); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_2d.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_2d.cpp index 574cf9c84981c..08e1018826273 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_2d.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_2d.cpp @@ -27,8 +27,6 @@ int main() { } } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - // Make the buffers 2D so we can test the rect copy path buffer BufferA{DataA.data(), range<2>(Size, Size)}; BufferA.set_write_back(false); @@ -36,74 +34,80 @@ int main() { BufferB.set_write_back(false); buffer BufferC{DataC.data(), range<2>(Size, Size)}; BufferC.set_write_back(false); - - // Copy from B to A - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - auto AccB = BufferB.get_access(CGH); - CGH.copy(AccB, AccA); - }); - - // Read & write A - auto NodeB = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - CGH.parallel_for(range<2>(Size, Size), - [=](item<2> id) { AccA[id] += ModValue; }); - }, - NodeA); - - // Read & write B - auto NodeModB = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - CGH.parallel_for(range<2>(Size, Size), - [=](item<2> id) { AccB[id] += ModValue; }); - }, - NodeA); - - // memcpy from A to B - auto NodeC = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - auto AccB = BufferB.get_access(CGH); - CGH.copy(AccA, AccB); - }, - NodeModB); - - // Read and write B - auto NodeD = add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - CGH.parallel_for(range<2>(Size, Size), - [=](item<2> id) { AccB[id] += ModValue; }); - }, - NodeC); - - // Copy from B to C - add_node( - Graph, Queue, - [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - auto AccC = BufferC.get_access(CGH); - CGH.copy(AccB, AccC); - }, - NodeD); - - auto GraphExec = Graph.finalize(); - - event Event; - for (unsigned n = 0; n < Iterations; n++) { - Event = Queue.submit([&](handler &CGH) { - CGH.depends_on(Event); - CGH.ext_oneapi_graph(GraphExec); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + // Copy from B to A + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + CGH.copy(AccB, AccA); }); + + // Read & write A + auto NodeB = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + CGH.parallel_for(range<2>(Size, Size), + [=](item<2> id) { AccA[id] += ModValue; }); + }, + NodeA); + + // Read & write B + auto NodeModB = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + CGH.parallel_for(range<2>(Size, Size), + [=](item<2> id) { AccB[id] += ModValue; }); + }, + NodeA); + + // memcpy from A to B + auto NodeC = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + CGH.copy(AccA, AccB); + }, + NodeModB); + + // Read and write B + auto NodeD = add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + CGH.parallel_for(range<2>(Size, Size), + [=](item<2> id) { AccB[id] += ModValue; }); + }, + NodeC); + + // Copy from B to C + add_node( + Graph, Queue, + [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + auto AccC = BufferC.get_access(CGH); + CGH.copy(AccB, AccC); + }, + NodeD); + + auto GraphExec = Graph.finalize(); + + event Event; + for (unsigned n = 0; n < Iterations; n++) { + Event = Queue.submit([&](handler &CGH) { + CGH.depends_on(Event); + CGH.ext_oneapi_graph(GraphExec); + }); + } + Queue.wait_and_throw(); } - Queue.wait_and_throw(); host_accessor HostAccA(BufferA); host_accessor HostAccB(BufferB); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target.cpp index 8a25673ea6645..08488009dede4 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target.cpp @@ -17,18 +17,23 @@ int main() { ReferenceA[i] = DataB[i]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA(DataA.data(), range<1>(Size)); BufferA.set_write_back(false); - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - CGH.copy(DataB.data(), AccA); - }); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + CGH.copy(DataB.data(), AccA); + }); - auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + auto GraphExec = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } host_accessor HostAccA(BufferA); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_2d.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_2d.cpp index 9d581a9dbe76e..3ce06926f5c7d 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_2d.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_2d.cpp @@ -17,20 +17,24 @@ int main() { ReferenceA[i] = DataB[i]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - // Make the buffers 2D so we can test the rect write path buffer BufferA{DataA.data(), range<2>(Size, Size)}; BufferA.set_write_back(false); - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - CGH.copy(DataB.data(), AccA); - }); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; - auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + CGH.copy(DataB.data(), AccA); + }); + auto GraphExec = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } host_accessor HostAccA(BufferA); for (size_t i = 0; i < Size; i++) { diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_offset.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_offset.cpp index 4bde5d8a2fa55..f8fd346c142ff 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_offset.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_host2target_offset.cpp @@ -20,19 +20,24 @@ int main() { ReferenceA[i] = DataB[i - Offset]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA(DataA.data(), range<1>(Size + Offset)); BufferA.set_write_back(false); - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH, range<1>(Size), - id<1>(Offset)); - CGH.copy(DataB.data(), AccA); - }); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH, range<1>(Size), + id<1>(Offset)); + CGH.copy(DataB.data(), AccA); + }); - auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + auto GraphExec = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } host_accessor HostAccA(BufferA); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_offsets.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_offsets.cpp index a52aae9220617..f6323efc1f7e7 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_offsets.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_offsets.cpp @@ -25,24 +25,29 @@ int main() { ReferenceB[j] = DataA[(j - OffsetDst) + OffsetSrc]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA{DataA}; BufferA.set_write_back(false); buffer BufferB{DataB}; BufferB.set_write_back(false); - // Copy from A to B - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access( - CGH, range<1>(Size - OffsetSrc), id<1>(OffsetSrc)); - auto AccB = BufferB.get_access( - CGH, range<1>(Size - OffsetDst), id<1>(OffsetDst)); - CGH.copy(AccA, AccB); - }); - - auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + // Copy from A to B + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access( + CGH, range<1>(Size - OffsetSrc), id<1>(OffsetSrc)); + auto AccB = BufferB.get_access( + CGH, range<1>(Size - OffsetDst), id<1>(OffsetDst)); + CGH.copy(AccA, AccB); + }); + + auto GraphExec = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } host_accessor HostAccA(BufferA); host_accessor HostAccB(BufferB); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host.cpp index 460ecd4ee945f..18b4dafe66862 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host.cpp @@ -18,18 +18,23 @@ int main() { ReferenceB[i] = DataA[i]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA(DataA.data(), range<1>(Size)); BufferA.set_write_back(false); - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - CGH.copy(AccA, DataB.data()); - }); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + CGH.copy(AccA, DataB.data()); + }); - auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + auto GraphExec = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } for (size_t i = 0; i < Size; i++) { assert(ReferenceA[i] == DataA[i]); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_2d.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_2d.cpp index 69050d2a8a1c6..42af3d0a47f43 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_2d.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_2d.cpp @@ -18,19 +18,24 @@ int main() { ReferenceB[i] = DataA[i]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - // Make the buffers 2D so we can test the rect read path buffer BufferA{DataA.data(), range<2>(Size, Size)}; BufferA.set_write_back(false); - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - CGH.copy(AccA, DataB.data()); - }); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + CGH.copy(AccA, DataB.data()); + }); - auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + auto GraphExec = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } host_accessor HostAccA(BufferA); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_offset.cpp b/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_offset.cpp index dcb6d290b1205..bd9e28ac30964 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_offset.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_copy_target2host_offset.cpp @@ -21,19 +21,24 @@ int main() { ReferenceB[i] = DataB[i]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA(DataA.data(), range<1>(Size)); BufferA.set_write_back(false); - auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access( - CGH, range<1>(Size - Offset), id<1>(Offset)); - CGH.copy(AccA, DataB.data()); - }); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + auto NodeA = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access( + CGH, range<1>(Size - Offset), id<1>(Offset)); + CGH.copy(AccA, DataB.data()); + }); - auto GraphExec = Graph.finalize(); - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + auto GraphExec = Graph.finalize(); + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }).wait(); + } for (size_t i = 0; i < Size; i++) { assert(ReferenceA[i] == DataA[i]); diff --git a/sycl/test-e2e/Graph/Inputs/buffer_ordering.cpp b/sycl/test-e2e/Graph/Inputs/buffer_ordering.cpp index 79305c69db52c..a51295583699d 100644 --- a/sycl/test-e2e/Graph/Inputs/buffer_ordering.cpp +++ b/sycl/test-e2e/Graph/Inputs/buffer_ordering.cpp @@ -15,7 +15,10 @@ int main() { queue Queue; - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; const size_t N = 10; std::vector Arr(N, 0.0f); diff --git a/sycl/test-e2e/Graph/Inputs/dotp_buffer_reduction.cpp b/sycl/test-e2e/Graph/Inputs/dotp_buffer_reduction.cpp index 9e64a0bdae5d8..eac37ec354376 100644 --- a/sycl/test-e2e/Graph/Inputs/dotp_buffer_reduction.cpp +++ b/sycl/test-e2e/Graph/Inputs/dotp_buffer_reduction.cpp @@ -6,8 +6,6 @@ int main() { queue Queue; - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - float DotpData = 0.f; const size_t N = 10; @@ -15,16 +13,20 @@ int main() { std::vector YData(N); std::vector ZData(N); - { - buffer DotpBuf(&DotpData, range<1>(1)); - DotpBuf.set_write_back(false); + buffer DotpBuf(&DotpData, range<1>(1)); + DotpBuf.set_write_back(false); - buffer XBuf(XData); - XBuf.set_write_back(false); - buffer YBuf(YData); - YBuf.set_write_back(false); - buffer ZBuf(ZData); - ZBuf.set_write_back(false); + buffer XBuf(XData); + XBuf.set_write_back(false); + buffer YBuf(YData); + YBuf.set_write_back(false); + buffer ZBuf(ZData); + ZBuf.set_write_back(false); + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; auto NodeI = add_node(Graph, Queue, [&](handler &CGH) { auto X = XBuf.get_access(CGH); @@ -75,10 +77,10 @@ int main() { // Using shortcut for executing a graph of commands Queue.ext_oneapi_graph(ExecGraph).wait(); - - host_accessor HostAcc(DotpBuf); - assert(HostAcc[0] == dotp_reference_result(N)); } + host_accessor HostAcc(DotpBuf); + assert(HostAcc[0] == dotp_reference_result(N)); + return 0; } diff --git a/sycl/test-e2e/Graph/Inputs/event_status_querying.cpp b/sycl/test-e2e/Graph/Inputs/event_status_querying.cpp index 04b5820a895b7..962f68c24488b 100644 --- a/sycl/test-e2e/Graph/Inputs/event_status_querying.cpp +++ b/sycl/test-e2e/Graph/Inputs/event_status_querying.cpp @@ -55,8 +55,6 @@ int main() { ReferenceC[j] = ReferenceB[j]; } - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA{DataA}; BufferA.set_write_back(false); buffer BufferB{DataB}; @@ -64,67 +62,75 @@ int main() { buffer BufferC{DataC}; BufferC.set_write_back(false); - // Copy from B to A - auto Init = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - auto AccB = BufferB.get_access(CGH); - CGH.copy(AccB, AccA); - }); - - // Read & write A - auto Node1 = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - CGH.parallel_for(range<1>(Size), [=](item<1> id) { - auto LinID = id.get_linear_id(); - AccA[LinID] += ModValue; + { + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; + + // Copy from B to A + auto Init = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + CGH.copy(AccB, AccA); }); - }); - - // Read & write B - auto Node2 = add_node(Graph, Queue, [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - CGH.parallel_for(range<1>(Size), [=](item<1> id) { - auto LinID = id.get_linear_id(); - AccB[LinID] += ModValue; + + // Read & write A + auto Node1 = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + CGH.parallel_for(range<1>(Size), [=](item<1> id) { + auto LinID = id.get_linear_id(); + AccA[LinID] += ModValue; + }); }); - }); - - // memcpy from A to B - auto Node3 = add_node(Graph, Queue, [&](handler &CGH) { - auto AccA = BufferA.get_access(CGH); - auto AccB = BufferB.get_access(CGH); - CGH.copy(AccA, AccB); - }); - - // Read and write B - auto Node4 = add_node(Graph, Queue, [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - CGH.parallel_for(range<1>(Size), [=](item<1> id) { - auto LinID = id.get_linear_id(); - AccB[LinID] += ModValue; + + // Read & write B + auto Node2 = add_node(Graph, Queue, [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + CGH.parallel_for(range<1>(Size), [=](item<1> id) { + auto LinID = id.get_linear_id(); + AccB[LinID] += ModValue; + }); }); - }); - - // Copy from B to C - auto Node5 = add_node(Graph, Queue, [&](handler &CGH) { - auto AccB = BufferB.get_access(CGH); - auto AccC = BufferC.get_access(CGH); - CGH.copy(AccB, AccC); - }); - - auto GraphExec = Graph.finalize(); - - sycl::event Event = - Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); - auto Info = Event.get_info(); - std::cout << event_status_name(Info) << std::endl; - while ( - (Info = Event.get_info()) != - sycl::info::event_command_status::complete) { - } - std::cout << event_status_name(Info) << std::endl; - Queue.wait_and_throw(); + // memcpy from A to B + auto Node3 = add_node(Graph, Queue, [&](handler &CGH) { + auto AccA = BufferA.get_access(CGH); + auto AccB = BufferB.get_access(CGH); + CGH.copy(AccA, AccB); + }); + + // Read and write B + auto Node4 = add_node(Graph, Queue, [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + CGH.parallel_for(range<1>(Size), [=](item<1> id) { + auto LinID = id.get_linear_id(); + AccB[LinID] += ModValue; + }); + }); + + // Copy from B to C + auto Node5 = add_node(Graph, Queue, [&](handler &CGH) { + auto AccB = BufferB.get_access(CGH); + auto AccC = BufferC.get_access(CGH); + CGH.copy(AccB, AccC); + }); + + auto GraphExec = Graph.finalize(); + + sycl::event Event = + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + auto Info = Event.get_info(); + std::cout << event_status_name(Info) << std::endl; + while ( + (Info = + Event.get_info()) != + sycl::info::event_command_status::complete) { + } + std::cout << event_status_name(Info) << std::endl; + + Queue.wait_and_throw(); + } host_accessor HostAccA(BufferA); host_accessor HostAccB(BufferB); diff --git a/sycl/test-e2e/Graph/Inputs/temp_buffer_reinterpret.cpp b/sycl/test-e2e/Graph/Inputs/temp_buffer_reinterpret.cpp index 97098dd22e191..a47165c38f104 100644 --- a/sycl/test-e2e/Graph/Inputs/temp_buffer_reinterpret.cpp +++ b/sycl/test-e2e/Graph/Inputs/temp_buffer_reinterpret.cpp @@ -19,12 +19,18 @@ int main() { std::vector ReferenceA(DataA), ReferenceB(DataB), ReferenceC(DataC); calculate_reference_data(Iterations, Size, ReferenceA, ReferenceB, ReferenceC); + buffer BufferA{DataA.data(), range<1>{DataA.size()}}; + BufferA.set_write_back(false); + buffer BufferB{DataB.data(), range<1>{DataB.size()}}; + BufferB.set_write_back(false); + buffer BufferC{DataC.data(), range<1>{DataC.size()}}; + BufferC.set_write_back(false); { - exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; - buffer BufferA{DataA.data(), range<1>{DataA.size()}}; - buffer BufferB{DataB.data(), range<1>{DataB.size()}}; - buffer BufferC{DataC.data(), range<1>{DataC.size()}}; + exp_ext::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {exp_ext::property::graph::assume_buffer_outlives_graph{}}}; { // Create some temporary buffers only for adding nodes @@ -43,6 +49,10 @@ int main() { CGH.ext_oneapi_graph(GraphExec); }); } + + Queue.copy(BufferA.get_access(), DataA.data()); + Queue.copy(BufferB.get_access(), DataB.data()); + Queue.copy(BufferC.get_access(), DataC.data()); // Perform a wait on all graph submissions. Queue.wait_and_throw(); } diff --git a/sycl/test-e2e/Graph/RecordReplay/assume_buffer_outlives_graph_property.cpp b/sycl/test-e2e/Graph/RecordReplay/assume_buffer_outlives_graph_property.cpp new file mode 100644 index 0000000000000..1a9f955d95739 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/assume_buffer_outlives_graph_property.cpp @@ -0,0 +1,11 @@ +// REQUIRES: level_zero, gpu +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using ZE_DEBUG +// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %} +// +// CHECK-NOT: LEAK + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/assume_buffer_outlives_graph_property.cpp" diff --git a/sycl/test-e2e/Graph/device_query.cpp b/sycl/test-e2e/Graph/device_query.cpp index fe778fadd7519..cd37707f8a8c2 100644 --- a/sycl/test-e2e/Graph/device_query.cpp +++ b/sycl/test-e2e/Graph/device_query.cpp @@ -11,13 +11,13 @@ int main() { auto Device = Queue.get_device(); - exp_ext::info::graph_support_level SupportsGraphs = + exp_ext::graph_support_level SupportsGraphs = Device.get_info(); auto Backend = Device.get_backend(); if (Backend == backend::ext_oneapi_level_zero) { - assert(SupportsGraphs == exp_ext::info::graph_support_level::native); + assert(SupportsGraphs == exp_ext::graph_support_level::native); } else { - assert(SupportsGraphs == exp_ext::info::graph_support_level::unsupported); + assert(SupportsGraphs == exp_ext::graph_support_level::unsupported); } } diff --git a/sycl/test-e2e/Graph/exception_unsupported_backend.cpp b/sycl/test-e2e/Graph/exception_unsupported_backend.cpp new file mode 100644 index 0000000000000..9453f35fa9b3d --- /dev/null +++ b/sycl/test-e2e/Graph/exception_unsupported_backend.cpp @@ -0,0 +1,35 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// Tests the ability to finalize a empty command graph +// The test checks that invalid exception is thrown +// when trying to create a graph with an unsupported backend. + +#include "graph_common.hpp" + +int GetUnsupportedBackend(const sycl::device &Dev) { + // Return 1 if the device backend is unsupported or 0 else. + // 0 does not prevent another device to be picked as a second choice + return Dev.get_info< + ext::oneapi::experimental::info::device::graph_support>() == + ext::oneapi::experimental::graph_support_level::unsupported; +} + +int main() { + sycl::device Dev{GetUnsupportedBackend}; + queue Queue{Dev}; + + if (Dev.get_info() != + ext::oneapi::experimental::graph_support_level::unsupported) + return 0; + + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + exp_ext::command_graph Graph{Queue.get_context(), Dev}; + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + assert(ExceptionCode == sycl::errc::invalid); + + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 9bca7079e413d..c608c1cefeb69 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4385,6 +4385,7 @@ _ZNK4sycl3_V16detail16AccessorBaseHost13isPlaceholderEv _ZNK4sycl3_V16detail16AccessorBaseHost14getAccessRangeEv _ZNK4sycl3_V16detail16AccessorBaseHost14getMemoryRangeEv _ZNK4sycl3_V16detail16AccessorBaseHost15getMemoryObjectEv +_ZNK4sycl3_V16detail16AccessorBaseHost25isMemoryObjectUsedByGraphEv _ZNK4sycl3_V16detail16AccessorBaseHost6getPtrEv _ZNK4sycl3_V16detail16AccessorBaseHost9getOffsetEv _ZNK4sycl3_V16detail18device_image_plain10has_kernelERKNS0_9kernel_idE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 71b93150dbe56..22b98310b87d2 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -99,8 +99,8 @@ ??$get_info@Ugpu_slices@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ ??$get_info@Ugpu_subslices_per_slice@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ ??$get_info@Ugpu_subslices_per_slice@device@info@intel@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBAIXZ -??$get_info@Ugraph_support@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@QEBA?AW4graph_support_level@info@experimental@oneapi@ext@12@XZ -??$get_info@Ugraph_support@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AW4graph_support_level@info@experimental@oneapi@ext@23@XZ +??$get_info@Ugraph_support@device@info@experimental@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@QEBA?AW4graph_support_level@experimental@oneapi@ext@12@XZ +??$get_info@Ugraph_support@device@info@experimental@oneapi@ext@_V1@sycl@@@device_impl@detail@_V1@sycl@@QEBA?AW4graph_support_level@experimental@oneapi@ext@23@XZ ??$get_info@Uhalf_fp_config@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AV?$vector@W4fp_config@info@_V1@sycl@@V?$allocator@W4fp_config@info@_V1@sycl@@@std@@@std@@XZ ??$get_info@Uhost_unified_memory@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_NXZ ??$get_info@Uimage2d_max_height@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_KXZ @@ -1306,10 +1306,12 @@ ?isHostPointerReadOnly@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ ?isImageOrImageArray@handler@_V1@sycl@@CA_NW4target@access@23@@Z ?isInterop@SYCLMemObjT@detail@_V1@sycl@@UEBA_NXZ +?isMemoryObjectUsedByGraph@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isOutOfRange@detail@_V1@sycl@@YA_NV?$vec@H$03@23@W4addressing_mode@23@V?$range@$02@23@@Z ?isPathPresent@OSUtil@detail@_V1@sycl@@SA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?isPlaceholder@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isStateExplicitKernelBundle@handler@_V1@sycl@@AEBA_NXZ +?isUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEBA_NXZ ?isValidModeForDestinationAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidModeForSourceAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidTargetForExplicitOp@handler@_V1@sycl@@CA_NW4target@access@23@@Z @@ -1371,7 +1373,9 @@ ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?markAsInternal@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ +?markBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?markBufferAsInternal@detail@_V1@sycl@@YAXAEBV?$shared_ptr@Vbuffer_impl@detail@_V1@sycl@@@std@@@Z +?markNoLongerBeingUsedInGraph@SYCLMemObjT@detail@_V1@sycl@@QEAAXXZ ?mem_advise@handler@_V1@sycl@@QEAAXPEBX_KH@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBUcode_location@detail@23@@Z ?mem_advise@queue@_V1@sycl@@QEAA?AVevent@23@PEBX_KHAEBV?$vector@Vevent@_V1@sycl@@V?$allocator@Vevent@_V1@sycl@@@std@@@std@@AEBUcode_location@detail@23@@Z @@ -1396,6 +1400,7 @@ ?message@SYCLCategory@detail@_V1@sycl@@UEBA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@H@Z ?name@SYCLCategory@detail@_V1@sycl@@UEBAPEBDXZ ?native_specialization_constant@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ +?needsWriteBack@SYCLMemObjT@detail@_V1@sycl@@QEBA_NXZ ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$00@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$01@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$02@23@Vkernel@23@@Z diff --git a/sycl/unittests/Extensions/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph.cpp index 4d986d0146887..7568c341a84d2 100644 --- a/sycl/unittests/Extensions/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph.cpp @@ -554,7 +554,11 @@ class CommandGraphTest : public ::testing::Test { public: CommandGraphTest() : Mock{}, Plat{Mock.getPlatform()}, Dev{Plat.get_devices()[0]}, - Queue{Dev}, Graph{Queue.get_context(), Dev} {} + Queue{Dev}, + Graph{Queue.get_context(), + Dev, + {experimental::property::graph::assume_buffer_outlives_graph{}}} { + } protected: void SetUp() override {} @@ -1571,6 +1575,164 @@ TEST_F(CommandGraphTest, GetProfilingInfoExceptionCheck) { ASSERT_EQ(ExceptionCode, sycl::errc::invalid); } +TEST_F(CommandGraphTest, MakeEdgeErrors) { + // Set up some nodes in the graph + auto NodeA = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + auto NodeB = Graph.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + // Test error on calling make_edge when a queue is recording to the graph + Graph.begin_recording(Queue); + ASSERT_THROW( + { + try { + Graph.make_edge(NodeA, NodeB); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + Graph.end_recording(Queue); + + // Test error on Src and Dest being the same + ASSERT_THROW( + { + try { + Graph.make_edge(NodeA, NodeA); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + // Test Src or Dest not being found in the graph + experimental::command_graph GraphOther{ + Queue.get_context(), Queue.get_device()}; + auto NodeOther = GraphOther.add( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + ASSERT_THROW( + { + try { + Graph.make_edge(NodeA, NodeOther); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + ASSERT_THROW( + { + try { + Graph.make_edge(NodeOther, NodeB); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + // Test that adding a cycle with cycle checks leaves the graph in the correct + // state. + + auto CheckGraphStructure = [&]() { + auto GraphImpl = sycl::detail::getSyclObjImpl(Graph); + auto NodeAImpl = sycl::detail::getSyclObjImpl(NodeA); + auto NodeBImpl = sycl::detail::getSyclObjImpl(NodeB); + + ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); + ASSERT_EQ(*(GraphImpl->MRoots.begin()), NodeAImpl); + + ASSERT_EQ(NodeAImpl->MSuccessors.size(), 1lu); + ASSERT_EQ(NodeAImpl->MPredecessors.size(), 0lu); + ASSERT_EQ(NodeAImpl->MSuccessors.front(), NodeBImpl); + + ASSERT_EQ(NodeBImpl->MSuccessors.size(), 0lu); + ASSERT_EQ(NodeBImpl->MPredecessors.size(), 1lu); + ASSERT_EQ(NodeBImpl->MPredecessors.front().lock(), NodeAImpl); + }; + // Make a normal edge + ASSERT_NO_THROW(Graph.make_edge(NodeA, NodeB)); + + // Check the expected structure of the graph + CheckGraphStructure(); + + // Introduce a cycle, make sure it throws + ASSERT_THROW( + { + try { + Graph.make_edge(NodeB, NodeA); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + + // Re-check graph structure to make sure the graph state has not been modified + CheckGraphStructure(); +} + +TEST_F(CommandGraphTest, InvalidBuffer) { + // Check that using a buffer with write_back enabled in a graph will throw. + int Data; + // Create a buffer which does not have write-back disabled. + buffer Buffer{&Data, range<1>{1}}; + + // Use this buffer in the graph, this should throw. + ASSERT_THROW( + { + try { + Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + }); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); +} + +TEST_F(CommandGraphTest, InvalidHostAccessor) { + // Check that creating a host_accessor on a buffer which is in use by a graph + // will throw. + + // Create a buffer which does not have write-back disabled. + buffer Buffer{range<1>{1}}; + + { + // Create a graph in local scope so we can destroy it + ext::oneapi::experimental::command_graph Graph{ + Queue.get_context(), + Queue.get_device(), + {experimental::property::graph::assume_buffer_outlives_graph{}}}; + + // Add the buffer to the graph. + Graph.add([&](handler &CGH) { + auto Acc = Buffer.get_access(CGH); + }); + + // Attempt to create a host_accessor, which should throw. + ASSERT_THROW( + { + try { + host_accessor HostAcc{Buffer}; + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + } + // Graph is now out of scope so we should be able to create a host_accessor + ASSERT_NO_THROW({ host_accessor HostAcc{Buffer}; }); +} + class MultiThreadGraphTest : public CommandGraphTest { public: MultiThreadGraphTest()