diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp index 80837a48f07d7..de41a6b28b781 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp @@ -31,6 +31,44 @@ namespace oneapi { namespace experimental { namespace detail { +// List of sycl features and extensions which are not supported by graphs. Used +// for throwing errors when these features are used with graphs. +enum class UnsupportedGraphFeatures { + sycl_reductions = 0, + sycl_specialization_constants = 1, + sycl_kernel_bundle = 2, + sycl_ext_oneapi_kernel_properties = 3, + sycl_ext_oneapi_enqueue_barrier = 4, + sycl_ext_oneapi_memcpy2d = 5, + sycl_ext_oneapi_device_global = 6, + sycl_ext_oneapi_bindless_images = 7 +}; + +constexpr const char * +UnsupportedFeatureToString(UnsupportedGraphFeatures Feature) { + using UGF = UnsupportedGraphFeatures; + switch (Feature) { + case UGF::sycl_reductions: + return "Reductions"; + case UGF::sycl_specialization_constants: + return "Specialization Constants"; + case UGF::sycl_kernel_bundle: + return "Kernel Bundles"; + case UGF::sycl_ext_oneapi_kernel_properties: + return "sycl_ext_oneapi_kernel_properties"; + case UGF::sycl_ext_oneapi_enqueue_barrier: + return "sycl_ext_oneapi_enqueue_barrier"; + case UGF::sycl_ext_oneapi_memcpy2d: + return "sycl_ext_oneapi_memcpy2d"; + case UGF::sycl_ext_oneapi_device_global: + return "sycl_ext_oneapi_device_global"; + case UGF::sycl_ext_oneapi_bindless_images: + return "sycl_ext_oneapi_bindless_images"; + default: + return {}; + } +} + class node_impl; class graph_impl; class exec_graph_impl; diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 33cfcdc70057d..9d5a50c876ac3 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -135,7 +135,7 @@ class pipe; namespace ext::oneapi::experimental::detail { class graph_impl; -} +} // namespace ext::oneapi::experimental::detail namespace detail { class handler_impl; @@ -1578,6 +1578,10 @@ class __SYCL_EXPORT handler { void set_specialization_constant( typename std::remove_reference_t::value_type Value) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_specialization_constants>(); + setStateSpecConstSet(); std::shared_ptr KernelBundleImplPtr = @@ -1592,6 +1596,10 @@ class __SYCL_EXPORT handler { typename std::remove_reference_t::value_type get_specialization_constant() const { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_specialization_constants>(); + if (isStateExplicitKernelBundle()) throw sycl::exception(make_error_code(errc::invalid), "Specialization constants cannot be read after " @@ -2107,6 +2115,7 @@ class __SYCL_EXPORT handler { std::enable_if_t< ext::oneapi::experimental::is_property_list::value> single_task(PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { + throwIfGraphAssociatedAndKernelProperties(); single_task_lambda_impl(Props, KernelFunc); } @@ -2117,6 +2126,7 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::is_property_list::value> parallel_for(range<1> NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { + throwIfGraphAssociatedAndKernelProperties(); parallel_for_lambda_impl( NumWorkItems, Props, std::move(KernelFunc)); } @@ -2127,6 +2137,7 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::is_property_list::value> parallel_for(range<2> NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { + throwIfGraphAssociatedAndKernelProperties(); parallel_for_lambda_impl( NumWorkItems, Props, std::move(KernelFunc)); } @@ -2137,6 +2148,7 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::is_property_list::value> parallel_for(range<3> NumWorkItems, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { + throwIfGraphAssociatedAndKernelProperties(); parallel_for_lambda_impl( NumWorkItems, Props, std::move(KernelFunc)); } @@ -2147,6 +2159,7 @@ class __SYCL_EXPORT handler { ext::oneapi::experimental::is_property_list::value> parallel_for(nd_range Range, PropertiesT Properties, _KERNELFUNCPARAM(KernelFunc)) { + throwIfGraphAssociatedAndKernelProperties(); parallel_for_impl(Range, Properties, std::move(KernelFunc)); } @@ -2159,6 +2172,9 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(range<1> Range, PropertiesT Properties, RestT &&...Rest) { + throwIfGraphAssociated(); + throwIfGraphAssociatedAndKernelProperties(); detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2170,6 +2186,9 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(range<2> Range, PropertiesT Properties, RestT &&...Rest) { + throwIfGraphAssociated(); + throwIfGraphAssociatedAndKernelProperties(); detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2181,6 +2200,9 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(range<3> Range, PropertiesT Properties, RestT &&...Rest) { + throwIfGraphAssociated(); + throwIfGraphAssociatedAndKernelProperties(); detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2216,6 +2238,8 @@ class __SYCL_EXPORT handler { detail::AreAllButLastReductions::value && ext::oneapi::experimental::is_property_list::value> parallel_for(nd_range Range, PropertiesT Properties, RestT &&...Rest) { + throwIfGraphAssociated(); detail::reduction_parallel_for(*this, Range, Properties, std::forward(Rest)...); } @@ -2235,6 +2259,7 @@ class __SYCL_EXPORT handler { int Dims, typename PropertiesT> void parallel_for_work_group(range NumWorkGroups, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { + throwIfGraphAssociatedAndKernelProperties(); parallel_for_work_group_lambda_impl(NumWorkGroups, Props, KernelFunc); @@ -2245,6 +2270,7 @@ class __SYCL_EXPORT handler { void parallel_for_work_group(range NumWorkGroups, range WorkGroupSize, PropertiesT Props, _KERNELFUNCPARAM(KernelFunc)) { + throwIfGraphAssociatedAndKernelProperties(); parallel_for_work_group_lambda_impl( NumWorkGroups, WorkGroupSize, Props, KernelFunc); @@ -2552,6 +2578,9 @@ class __SYCL_EXPORT handler { /// until all commands previously submitted to this queue have entered the /// complete state. void ext_oneapi_barrier() { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_enqueue_barrier>(); throwIfActionIsCreated(); setType(detail::CG::Barrier); } @@ -2637,6 +2666,9 @@ class __SYCL_EXPORT handler { typename = std::enable_if_t>> void ext_oneapi_memcpy2d(void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, size_t Width, size_t Height) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_memcpy2d>(); throwIfActionIsCreated(); if (Width > DestPitch) throw sycl::exception(sycl::make_error_code(errc::invalid), @@ -2815,6 +2847,9 @@ class __SYCL_EXPORT handler { void memcpy(ext::oneapi::experimental::device_global &Dest, const void *Src, size_t NumBytes = sizeof(T), size_t DestOffset = 0) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_device_global>(); if (sizeof(T) < DestOffset + NumBytes) throw sycl::exception(make_error_code(errc::invalid), "Copy to device_global is out of bounds."); @@ -2847,6 +2882,9 @@ class __SYCL_EXPORT handler { memcpy(void *Dest, const ext::oneapi::experimental::device_global &Src, size_t NumBytes = sizeof(T), size_t SrcOffset = 0) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_device_global>(); if (sizeof(T) < SrcOffset + NumBytes) throw sycl::exception(make_error_code(errc::invalid), "Copy from device_global is out of bounds."); @@ -3368,8 +3406,34 @@ class __SYCL_EXPORT handler { "handler::require() before it can be used."); } + template + std::enable_if_t< + ext::oneapi::experimental::is_property_list::value> + throwIfGraphAssociatedAndKernelProperties() const { + if (!std::is_same_v) + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_kernel_properties>(); + } + // Set value of the gpu cache configuration for the kernel. void setKernelCacheConfig(sycl::detail::pi::PiKernelCacheConfig); + + template < + ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT> + void throwIfGraphAssociated() const { + + if (getCommandGraph()) { + std::string FeatureString = + ext::oneapi::experimental::detail::UnsupportedFeatureToString( + FeatureT); + throw sycl::exception(sycl::make_error_code(errc::invalid), + "The " + FeatureString + + " feature is not yet available " + "for use with the SYCL Graph extension."); + } + } }; } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 77c60b9bb7829..c29e109128f35 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -1355,7 +1355,8 @@ struct NDRangeReduction< sycl::atomic_ref( NWorkGroupsFinished[0]); - DoReducePartialSumsInLastWG[0] = ++NFinished == NWorkGroups; + DoReducePartialSumsInLastWG[0] = + ++NFinished == static_cast(NWorkGroups); } workGroupBarrier(); diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.cpp index 363fa074c8b92..3232dbf8dbb8f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.cpp @@ -663,6 +663,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) { + // There are issues with immediate command lists so return an error if the + // queue is in that mode. + if (Queue->UsingImmCmdLists) { + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; + } + std::scoped_lock lock(Queue->Mutex); // Use compute engine rather than copy engine const auto UseCopyEngine = false; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 586530cbe596e..24edd3d6e9a1e 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -278,6 +278,11 @@ void event_impl::checkProfilingPreconditions() const { "Profiling information is unavailable as the queue associated with " "the event does not have the 'enable_profiling' property."); } + if (MEventFromSubmitedExecCommandBuffer) { + throw sycl::exception(make_error_code(sycl::errc::invalid), + "Profiling information is unavailable for events " + "returned by a graph submission."); + } } template <> diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 6565ff092eeac..067218f5a8459 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -282,6 +282,14 @@ class event_impl { return MGraph.lock(); } + void setEventFromSubmitedExecCommandBuffer(bool value) { + MEventFromSubmitedExecCommandBuffer = value; + } + + bool isEventFromSubmitedExecCommandBuffer() const { + return MEventFromSubmitedExecCommandBuffer; + } + protected: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait @@ -332,6 +340,8 @@ class event_impl { /// Store the command graph associated with this event, if any. /// This event is also be stored in the graph so a weak_ptr is used. std::weak_ptr MGraph; + /// Indicates that the event results from a command graph submission + bool MEventFromSubmitedExecCommandBuffer = false; // If this event represents a submission to a // sycl::detail::pi::PiExtCommandBuffer the sync point for that submission is diff --git a/sycl/source/detail/fusion/fusion_wrapper_impl.cpp b/sycl/source/detail/fusion/fusion_wrapper_impl.cpp index 492b0bc4aa852..d846b018ab64c 100644 --- a/sycl/source/detail/fusion/fusion_wrapper_impl.cpp +++ b/sycl/source/detail/fusion/fusion_wrapper_impl.cpp @@ -27,6 +27,11 @@ bool fusion_wrapper_impl::is_in_fusion_mode() const { } void fusion_wrapper_impl::start_fusion() { + if (MQueue->getCommandGraph()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "SYCL kernel fusion can NOT be started " + "on a queue that is in a recording state."); + } detail::Scheduler::getInstance().startFusion(MQueue); } diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 713893508a004..30d66ffc12e02 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -372,6 +372,7 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, auto NewEvent = std::make_shared(Queue); NewEvent->setContextImpl(Queue->getContextImplPtr()); NewEvent->setStateIncomplete(); + NewEvent->setEventFromSubmitedExecCommandBuffer(true); return NewEvent; }); @@ -395,7 +396,14 @@ exec_graph_impl::enqueue(const std::shared_ptr &Queue, ->call_nocheck< sycl::detail::PiApiKind::piextEnqueueCommandBuffer>( CommandBuffer, Queue->getHandleRef(), 0, nullptr, OutEvent); - if (Res != pi_result::PI_SUCCESS) { + if (Res == pi_result::PI_ERROR_INVALID_QUEUE_PROPERTIES) { + throw sycl::exception( + make_error_code(errc::invalid), + "Graphs cannot be submitted to a queue which uses " + "immediate command lists. Use " + "sycl::ext::intel::property::queue::no_immediate_" + "command_list to disable them."); + } else if (Res != pi_result::PI_SUCCESS) { throw sycl::exception( errc::event, "Failed to enqueue event for command buffer submission"); @@ -509,6 +517,12 @@ modifiable_command_graph::finalize(const sycl::property_list &) const { bool modifiable_command_graph::begin_recording(queue &RecordingQueue) { auto QueueImpl = sycl::detail::getSyclObjImpl(RecordingQueue); + if (QueueImpl->is_in_fusion_mode()) { + throw sycl::exception(sycl::make_error_code(errc::invalid), + "SYCL queue in kernel in fusion mode " + "can NOT be recorded."); + } + if (QueueImpl->get_context() != impl->getContext()) { throw sycl::exception(sycl::make_error_code(errc::invalid), "begin_recording called for a queue whose context " diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 10e85e01848c3..0c6d422fa83fe 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -795,6 +795,9 @@ void handler::verifyUsedKernelBundle(const std::string &KernelName) { } void handler::ext_oneapi_barrier(const std::vector &WaitList) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_enqueue_barrier>(); throwIfActionIsCreated(); MCGType = detail::CG::BarrierWaitlist; MEventsWaitWithBarrier.resize(WaitList.size()); @@ -889,6 +892,9 @@ void handler::ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value, void handler::ext_oneapi_copy( void *Src, ext::oneapi::experimental::image_mem_handle Dest, const ext::oneapi::experimental::image_descriptor &Desc) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MSrcPtr = Src; MDstPtr = Dest.raw_handle; @@ -922,7 +928,9 @@ void handler::ext_oneapi_copy( ext::oneapi::experimental::image_mem_handle Dest, sycl::range<3> DestOffset, const ext::oneapi::experimental::image_descriptor &DestImgDesc, sycl::range<3> CopyExtent) { - + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MSrcPtr = Src; MDstPtr = Dest.raw_handle; @@ -955,6 +963,9 @@ void handler::ext_oneapi_copy( void handler::ext_oneapi_copy( ext::oneapi::experimental::image_mem_handle Src, void *Dest, const ext::oneapi::experimental::image_descriptor &Desc) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MSrcPtr = Src.raw_handle; MDstPtr = Dest; @@ -988,6 +999,9 @@ void handler::ext_oneapi_copy( const ext::oneapi::experimental::image_descriptor &SrcImgDesc, void *Dest, sycl::range<3> DestOffset, sycl::range<3> DestExtent, sycl::range<3> CopyExtent) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MSrcPtr = Src.raw_handle; MDstPtr = Dest; @@ -1020,6 +1034,9 @@ void handler::ext_oneapi_copy( void handler::ext_oneapi_copy( void *Src, void *Dest, const ext::oneapi::experimental::image_descriptor &Desc, size_t Pitch) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MSrcPtr = Src; MDstPtr = Dest; @@ -1055,6 +1072,9 @@ void handler::ext_oneapi_copy( const ext::oneapi::experimental::image_descriptor &DeviceImgDesc, size_t DeviceRowPitch, sycl::range<3> HostExtent, sycl::range<3> CopyExtent) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MSrcPtr = Src; MDstPtr = Dest; @@ -1088,6 +1108,9 @@ void handler::ext_oneapi_copy( void handler::ext_oneapi_wait_external_semaphore( sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MImpl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; setType(detail::CG::SemaphoreWait); @@ -1095,6 +1118,9 @@ void handler::ext_oneapi_wait_external_semaphore( void handler::ext_oneapi_signal_external_semaphore( sycl::ext::oneapi::experimental::interop_semaphore_handle SemaphoreHandle) { + throwIfGraphAssociated< + ext::oneapi::experimental::detail::UnsupportedGraphFeatures:: + sycl_ext_oneapi_bindless_images>(); MImpl->MInteropSemaphoreHandle = (sycl::detail::pi::PiInteropSemaphoreHandle)SemaphoreHandle.raw_handle; setType(detail::CG::SemaphoreSignal); @@ -1103,6 +1129,9 @@ void handler::ext_oneapi_signal_external_semaphore( void handler::use_kernel_bundle( const kernel_bundle &ExecBundle) { + throwIfGraphAssociated(); + std::shared_ptr PrimaryQueue = MImpl->MSubmissionPrimaryQueue; if (PrimaryQueue->get_context() != ExecBundle.get_context()) diff --git a/sycl/test-e2e/Graph/RecordReplay/dotp_buffer_reduction.cpp b/sycl/test-e2e/Graph/RecordReplay/dotp_buffer_reduction.cpp index af577686832cd..3e72076841306 100644 --- a/sycl/test-e2e/Graph/RecordReplay/dotp_buffer_reduction.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/dotp_buffer_reduction.cpp @@ -6,6 +6,9 @@ // // CHECK-NOT: LEAK +// Expected fail as reduction support is not complete. +// XFAIL: * + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/dotp_buffer_reduction.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/dotp_usm_reduction.cpp b/sycl/test-e2e/Graph/RecordReplay/dotp_usm_reduction.cpp index dab4b34eec79d..6738affa87c13 100644 --- a/sycl/test-e2e/Graph/RecordReplay/dotp_usm_reduction.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/dotp_usm_reduction.cpp @@ -6,6 +6,9 @@ // // CHECK-NOT: LEAK +// Expected fail as reduction support is not complete. +// XFAIL: * + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/dotp_usm_reduction.cpp" diff --git a/sycl/test-e2e/Graph/RecordReplay/sub_graph_reduction.cpp b/sycl/test-e2e/Graph/RecordReplay/sub_graph_reduction.cpp index f3e58b1ef99ff..de9cbead9634d 100644 --- a/sycl/test-e2e/Graph/RecordReplay/sub_graph_reduction.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/sub_graph_reduction.cpp @@ -6,6 +6,9 @@ // // CHECK-NOT: LEAK +// Expected fail as reduction support is not complete. +// XFAIL: * + #define GRAPH_E2E_RECORD_REPLAY #include "../Inputs/sub_graph_reduction.cpp" diff --git a/sycl/test-e2e/Graph/graph_exception_global_device_extension.cpp b/sycl/test-e2e/Graph/graph_exception_global_device_extension.cpp new file mode 100644 index 0000000000000..e674beec4693f --- /dev/null +++ b/sycl/test-e2e/Graph/graph_exception_global_device_extension.cpp @@ -0,0 +1,150 @@ +// REQUIRES: level_zero, gpu +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// +// The test checks that invalid exception is thrown +// when trying to use sycl_ext_oneapi_device_global +// along with Graph. + +#include "graph_common.hpp" + +using TestProperties = decltype(sycl::ext::oneapi::experimental::properties{}); + +sycl::ext::oneapi::experimental::device_global + MemcpyDeviceGlobal; +sycl::ext::oneapi::experimental::device_global + CopyDeviceGlobal; + +enum OperationPath { Explicit, RecordReplay, Shortcut }; + +template void test() { + queue Q; + int MemcpyWrite = 42, CopyWrite = 24, MemcpyRead = 1, CopyRead = 2; + + exp_ext::command_graph Graph{Q.get_context(), Q.get_device()}; + + if constexpr (PathKind != OperationPath::Explicit) { + Graph.begin_recording(Q); + } + + // Copy from device globals before having written anything. + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q.memcpy(&MemcpyRead, MemcpyDeviceGlobal); + } + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + return CGH.memcpy(&MemcpyRead, MemcpyDeviceGlobal); + }); + } + if constexpr (PathKind == OperationPath::Explicit) { + Graph.add([&](handler &CGH) { + return CGH.memcpy(&MemcpyRead, MemcpyDeviceGlobal); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + assert(ExceptionCode == sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q.copy(CopyDeviceGlobal, &CopyRead); + } + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit( + [&](handler &CGH) { return CGH.copy(CopyDeviceGlobal, &CopyRead); }); + } + if constexpr (PathKind == OperationPath::Explicit) { + Graph.add( + [&](handler &CGH) { return CGH.copy(CopyDeviceGlobal, &CopyRead); }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + assert(ExceptionCode == sycl::errc::invalid); + + // Write to device globals and then read their values. + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q.memcpy(MemcpyDeviceGlobal, &MemcpyWrite); + } + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + return CGH.memcpy(MemcpyDeviceGlobal, &MemcpyWrite); + }); + } + if constexpr (PathKind == OperationPath::Explicit) { + Graph.add([&](handler &CGH) { + return CGH.memcpy(MemcpyDeviceGlobal, &MemcpyWrite); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + assert(ExceptionCode == sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q.copy(&CopyWrite, CopyDeviceGlobal); + } else if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit( + [&](handler &CGH) { return CGH.copy(&CopyWrite, CopyDeviceGlobal); }); + } else if constexpr (PathKind == OperationPath::Explicit) { + Graph.add( + [&](handler &CGH) { return CGH.copy(&CopyWrite, CopyDeviceGlobal); }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + assert(ExceptionCode == sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q.memcpy(&MemcpyRead, MemcpyDeviceGlobal); + } else if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + return CGH.memcpy(&MemcpyRead, MemcpyDeviceGlobal); + }); + } else if constexpr (PathKind == OperationPath::Explicit) { + Graph.add([&](handler &CGH) { + return CGH.memcpy(&MemcpyRead, MemcpyDeviceGlobal); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + assert(ExceptionCode == sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q.copy(CopyDeviceGlobal, &CopyRead); + } else if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit( + [&](handler &CGH) { return CGH.copy(CopyDeviceGlobal, &CopyRead); }); + } else if constexpr (PathKind == OperationPath::Explicit) { + Graph.add( + [&](handler &CGH) { return CGH.copy(CopyDeviceGlobal, &CopyRead); }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + assert(ExceptionCode == sycl::errc::invalid); + + if constexpr (PathKind != OperationPath::Explicit) { + Graph.end_recording(); + } +} + +int main() { + test(); + test(); + test(); + return 0; +} diff --git a/sycl/test-e2e/Graph/immediate_command_list_error.cpp b/sycl/test-e2e/Graph/immediate_command_list_error.cpp new file mode 100644 index 0000000000000..bad3fac48007c --- /dev/null +++ b/sycl/test-e2e/Graph/immediate_command_list_error.cpp @@ -0,0 +1,47 @@ +// REQUIRES: level_zero, gpu +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %if ext_oneapi_level_zero %{env ZE_DEBUG=4 %{run} %t.out 2>&1 | FileCheck %s %} +// +// CHECK-NOT: LEAK + +// Tests that graph submission will throw if the target queue is using immediate +// command lists and not throw if they are using regular command queues. + +#include "graph_common.hpp" + +int main() { + queue QueueImmediate{ + {sycl::ext::intel::property::queue::immediate_command_list{}}}; + queue QueueNoImmediate{ + QueueImmediate.get_context(), + QueueImmediate.get_device(), + {sycl::ext::intel::property::queue::no_immediate_command_list{}}}; + + exp_ext::command_graph Graph{QueueNoImmediate.get_context(), + QueueNoImmediate.get_device()}; + + std::error_code ErrorCode = make_error_code(sycl::errc::success); + try { + auto GraphExec = Graph.finalize(); + QueueNoImmediate.submit( + [&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + } catch (sycl::exception &E) { + ErrorCode = E.code(); + } + + assert(ErrorCode == make_error_code(errc::success)); + + ErrorCode = make_error_code(sycl::errc::success); + try { + auto GraphExec = Graph.finalize(); + QueueImmediate.submit( + [&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + } catch (sycl::exception &E) { + ErrorCode = E.code(); + } + + assert(ErrorCode == make_error_code(errc::invalid)); + + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph.cpp index 590d86b8e0019..2a8eb0242b66a 100644 --- a/sycl/unittests/Extensions/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph.cpp @@ -14,12 +14,457 @@ #include #include #include +#include #include using namespace sycl; using namespace sycl::ext::oneapi; +// Spec constant for testing. +constexpr specialization_id SpecConst1{7}; + +namespace sycl { +inline namespace _V1 { +namespace detail { + +// Necessary for get_specialization_constant() to work in unit tests. +template <> const char *get_spec_constant_symbolic_ID() { + return "SC1"; +} +} // namespace detail +} // namespace _V1 +} // namespace sycl + +// anonymous namespace used to avoid code redundancy by defining functions +// used by multiple times by unitests. +// Defining anonymous namespace prevents from function naming conflits +namespace { +/// Define the three possible path to add node to a SYCL Graph. +/// Shortcut is a sub-type of Record&Replay using Queue shortcut +/// instead of standard kernel submitions. +enum OperationPath { Explicit, RecordReplay, Shortcut }; + +/// Function types and classes for testing Kernel with properties extension +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template class ReqPositiveA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +template struct KernelFunctorWithWGSizeProp { + void operator()(nd_item) const {} + void operator()(item) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::work_group_size}; + } +}; + +/// Tries to add a Parallel_for node with kernel properties to the graph G +/// It tests that an invalid exception has been thrown +/// Since sycl_ext_oneapi_kernel_properties extension can not be used +/// along with SYCL Graph. +/// +/// @param G Modifiable graph to add commands to. +/// @param Q Queue to submit nodes to. +/// @param Props Properties associated to the submitted kernel +/// @param KernelFunc pointer to the kernel +template +void addKernelWithProperties( + sycl::ext::oneapi::experimental::detail::modifiable_command_graph &G, + queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = sizeof...(Is); + + // Test Parallel_for + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} + +/// Tries to add a Single task node with kernel properties to the graph G +/// It tests that an invalid exception has been thrown +/// Since sycl_ext_oneapi_kernel_properties extension can not be used +/// along with SYCL Graph. +/// +/// @param G Modifiable graph to add commands to. +/// @param Q Queue to submit nodes to. +/// @param Props Properties associated to the submitted kernel +/// @param KernelFunc pointer to the kernel +template +void testSingleTaskProperties(experimental::detail::modifiable_command_graph &G, + queue &Q, PropertiesT Props, + KernelType KernelFunc) { + + // Test Single_task + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + G.begin_recording(Q); + Q.submit([&](sycl::handler &CGH) { + CGH.single_task>(Props, + KernelFunc); + }); + G.end_recording(); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](sycl::handler &CGH) { + CGH.single_task>(Props, + KernelFunc); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} + +/// This function groups all the different test cases +/// when adding a Parallel_for node with kernel properties to the graph G +/// +/// @param G Modifiable graph to add commands to. +/// @param Q Queue to submit nodes to. +template +void testParallelForProperties( + queue &Q, experimental::detail::modifiable_command_graph &G) { + auto Props = ext::oneapi::experimental::properties{ + experimental::work_group_size}; + auto KernelFunction = [](auto) {}; + + KernelFunctorWithWGSizeProp KernelFunctor; + + G.begin_recording(Q); + + addKernelWithProperties(G, Q, Props, KernelFunction); + addKernelWithProperties(G, Q, Props, + KernelFunctor); + + addKernelWithProperties( + G, Q, Props, KernelFunction); + addKernelWithProperties(G, Q, Props, KernelFunctor); + + G.end_recording(); + + addKernelWithProperties( + G, Q, Props, KernelFunction); + addKernelWithProperties(G, Q, Props, KernelFunctor); +} + +/// Tries to enqueue oneapi barrier to the graph G +/// It tests that an invalid exception has been thrown +/// Since sycl_ext_oneapi_enqueue_barrier extension can not be used +/// along with SYCL Graph. +template void testEnqueueBarrier() { + sycl::context Context; + sycl::queue Q1(Context, sycl::default_selector_v); + + experimental::command_graph Graph1{ + Q1.get_context(), Q1.get_device()}; + + Graph1.add([&](sycl::handler &cgh) {}); + Graph1.add([&](sycl::handler &cgh) {}); + + if constexpr (PathKind != OperationPath::Explicit) { + Graph1.begin_recording(Q1); + } + + // call queue::ext_oneapi_submit_barrier() + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q1.ext_oneapi_submit_barrier(); + } + if constexpr (PathKind == OperationPath::RecordReplay) { + Q1.submit([&](sycl::handler &CGH) { CGH.ext_oneapi_barrier(); }); + } + if constexpr (PathKind == OperationPath::Explicit) { + Graph1.add([&](handler &CGH) { CGH.ext_oneapi_barrier(); }); + } + + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + if constexpr (PathKind != OperationPath::Explicit) { + Graph1.end_recording(); + } + + sycl::queue Q2(Context, sycl::default_selector_v); + sycl::queue Q3(Context, sycl::default_selector_v); + + experimental::command_graph Graph2{ + Q2.get_context(), Q2.get_device()}; + experimental::command_graph Graph3{ + Q3.get_context(), Q3.get_device()}; + + Graph2.begin_recording(Q2); + Graph3.begin_recording(Q3); + + auto Event1 = Q2.submit([&](sycl::handler &cgh) {}); + auto Event2 = Q3.submit([&](sycl::handler &cgh) {}); + + if constexpr (PathKind == OperationPath::Explicit) { + Graph2.end_recording(); + Graph3.end_recording(); + } + + // call handler::barrier(const std::vector &WaitList) + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::Shortcut) { + Q3.ext_oneapi_submit_barrier({Event1, Event2}); + } + if constexpr (PathKind == OperationPath::RecordReplay) { + Q3.submit([&](sycl::handler &CGH) { + CGH.ext_oneapi_barrier({Event1, Event2}); + }); + } + if constexpr (PathKind == OperationPath::Explicit) { + Graph3.add([&](handler &CGH) { + CGH.ext_oneapi_barrier({Event1, Event2}); + }); + } + + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + if constexpr (PathKind != OperationPath::Explicit) { + Graph2.end_recording(); + Graph3.end_recording(); + } +} + +/// Tries to add a memcpy2D node to the graph G +/// It tests that an invalid exception has been thrown +/// Since sycl_ext_oneapi_memcpy2d extension can not be used +/// along with SYCL Graph. +/// +/// @param G Modifiable graph to add commands to. +/// @param Q Queue to submit nodes to. +/// @param Dest Pointer to the memory destination +/// @param DestPitch pitch at the destination +/// @param Src Pointer to the memory source +/// @param SrcPitch pitch at the source +/// @param Witdh width of the data to copy +/// @param Height height of the data to copy +template +void addMemcpy2D(experimental::detail::modifiable_command_graph &G, queue &Q, + void *Dest, size_t DestPitch, const void *Src, size_t SrcPitch, + size_t Width, size_t Height) { + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_memcpy2d(Dest, DestPitch, Src, SrcPitch, Width, Height); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_memcpy2d(Dest, DestPitch, Src, SrcPitch, Width, Height); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_memcpy2d(Dest, DestPitch, Src, SrcPitch, Width, Height); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} + +/// Tries to add nodes including images bindless copy instructions +/// to the graph G. It tests that an invalid exception has been thrown +/// Since sycl_ext_oneapi_bindless_images extension can not be used +/// along with SYCL Graph. +/// +/// @param G Modifiable graph to add commands to. +/// @param Q Queue to submit nodes to. +/// @param Img Image memory +/// @param HostData Host Pointer to the memory +/// @param ImgUSM USM Pointer to Image memory +/// @param Pitch image pitch +/// @param Desc Image descriptor +template +void addImagesCopies(experimental::detail::modifiable_command_graph &G, + queue &Q, sycl::ext::oneapi::experimental::image_mem Img, + std::vector HostData, void *ImgUSM, + size_t Pitch, + sycl::ext::oneapi::experimental::image_descriptor Desc) { + // simple copy Host to Device + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), Img.get_handle(), Desc); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), Img.get_handle(), Desc); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), Img.get_handle(), Desc); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // simple copy Device to Host + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), HostData.data(), Desc); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(Img.get_handle(), HostData.data(), Desc); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), HostData.data(), Desc); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // simple copy Host to Device USM + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), ImgUSM, Desc, Pitch); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), ImgUSM, Desc, Pitch); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), ImgUSM, Desc, Pitch); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // subregion copy Host to Device + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, {0, 0, 0}, + Img.get_handle(), {0, 0, 0}, Desc, {0, 0, 0}); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), {0, 0, 0}, {0, 0, 0}, Img.get_handle(), + {0, 0, 0}, Desc, {0, 0, 0}); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, {0, 0, 0}, + Img.get_handle(), {0, 0, 0}, Desc, {0, 0, 0}); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // subregion copy Device to Host + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), {0, 0, 0}, Desc, HostData.data(), + {0, 0, 0}, {0, 0, 0}, {0, 0, 0}); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(Img.get_handle(), {0, 0, 0}, Desc, HostData.data(), + {0, 0, 0}, {0, 0, 0}, {0, 0, 0}); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(Img.get_handle(), {0, 0, 0}, Desc, HostData.data(), + {0, 0, 0}, {0, 0, 0}, {0, 0, 0}); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + // subregion copy Host to Device USM + ExceptionCode = make_error_code(sycl::errc::success); + try { + if constexpr (PathKind == OperationPath::RecordReplay) { + Q.submit([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, ImgUSM, {0, 0, 0}, Desc, + Pitch, {0, 0, 0}, {0, 0, 0}); + }); + } + if constexpr (PathKind == OperationPath::Shortcut) { + Q.ext_oneapi_copy(HostData.data(), {0, 0, 0}, ImgUSM, {0, 0, 0}, Desc, + Pitch, {0, 0, 0}, {0, 0, 0}); + } + if constexpr (PathKind == OperationPath::Explicit) { + G.add([&](handler &CGH) { + CGH.ext_oneapi_copy(HostData.data(), {0, 0, 0}, ImgUSM, {0, 0, 0}, Desc, + Pitch, {0, 0, 0}, {0, 0, 0}); + }); + } + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} +} // anonymous namespace + class CommandGraphTest : public ::testing::Test { public: CommandGraphTest() @@ -57,7 +502,7 @@ TEST_F(CommandGraphTest, AddNode) { ASSERT_TRUE(GraphImpl->MRoots.empty()); auto Node1 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); ASSERT_NE(sycl::detail::getSyclObjImpl(Node1), nullptr); ASSERT_FALSE(sycl::detail::getSyclObjImpl(Node1)->isEmpty()); ASSERT_EQ(GraphImpl->MRoots.size(), 1lu); @@ -112,17 +557,17 @@ TEST_F(CommandGraphTest, Finalize) { sycl::buffer Buf(1); auto Node1 = Graph.add([&](sycl::handler &cgh) { sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); - cgh.single_task([=]() { A[0] = 1; }); + cgh.single_task>([]() {}); }); // Add independent node auto Node2 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); // Add a node that depends on Node1 due to the accessor auto Node3 = Graph.add([&](sycl::handler &cgh) { sycl::accessor A(Buf, cgh, sycl::write_only, sycl::no_init); - cgh.single_task([=]() { A[0] = 3; }); + cgh.single_task>([]() {}); }); // Guarantee order of independent nodes 1 and 2 @@ -148,7 +593,7 @@ TEST_F(CommandGraphTest, MakeEdge) { // Add two independent nodes auto Node1 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto Node2 = Graph.add([&](sycl::handler &cgh) {}); ASSERT_EQ(GraphImpl->MRoots.size(), 2ul); ASSERT_TRUE(sycl::detail::getSyclObjImpl(Node1)->MSuccessors.empty()); @@ -242,7 +687,7 @@ TEST_F(CommandGraphTest, BeginEndRecording) { TEST_F(CommandGraphTest, GetCGCopy) { auto Node1 = Graph.add([&](sycl::handler &cgh) {}); auto Node2 = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, {experimental::property::node::depends_on(Node1)}); // Get copy of CG of Node2 and check equality @@ -264,21 +709,21 @@ TEST_F(CommandGraphTest, GetCGCopy) { TEST_F(CommandGraphTest, SubGraph) { // Add sub-graph with two nodes auto Node1Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto Node2Graph = Graph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, {experimental::property::node::depends_on(Node1Graph)}); auto GraphExec = Graph.finalize(); // Add node to main graph followed by sub-graph and another node experimental::command_graph MainGraph(Queue.get_context(), Dev); auto Node1MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto Node2MainGraph = MainGraph.add([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }, {experimental::property::node::depends_on(Node1MainGraph)}); auto Node3MainGraph = MainGraph.add( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }, + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }, {experimental::property::node::depends_on(Node2MainGraph)}); // Assert order of the added sub-graph @@ -316,10 +761,10 @@ TEST_F(CommandGraphTest, RecordSubGraph) { // Record sub-graph with two nodes Graph.begin_recording(Queue); auto Node1Graph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto Node2Graph = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node1Graph); - cgh.single_task([]() {}); + cgh.single_task>([]() {}); }); Graph.end_recording(Queue); auto GraphExec = Graph.finalize(); @@ -328,14 +773,14 @@ TEST_F(CommandGraphTest, RecordSubGraph) { experimental::command_graph MainGraph(Queue.get_context(), Dev); MainGraph.begin_recording(Queue); auto Node1MainGraph = Queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto Node2MainGraph = Queue.submit([&](handler &cgh) { cgh.depends_on(Node1MainGraph); cgh.ext_oneapi_graph(GraphExec); }); auto Node3MainGraph = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(Node2MainGraph); - cgh.single_task([]() {}); + cgh.single_task>([]() {}); }); MainGraph.end_recording(Queue); @@ -385,7 +830,7 @@ TEST_F(CommandGraphTest, InOrderQueue) { // Record in-order queue with three nodes InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -394,7 +839,7 @@ TEST_F(CommandGraphTest, InOrderQueue) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -407,7 +852,7 @@ TEST_F(CommandGraphTest, InOrderQueue) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -445,7 +890,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmpty) { // node InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -466,7 +911,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmpty) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -510,7 +955,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyFirst) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -523,7 +968,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyFirst) { ASSERT_EQ(PtrNode2->MPredecessors.front().lock(), PtrNode1); auto Node3Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode3 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -559,7 +1004,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyLast) { // Record in-order queue with two regular nodes then an empty node InOrderGraph.begin_recording(InOrderQueue); auto Node1Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode1 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -568,7 +1013,7 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyLast) { ASSERT_TRUE(PtrNode1->MPredecessors.empty()); auto Node2Graph = InOrderQueue.submit( - [&](sycl::handler &cgh) { cgh.single_task([]() {}); }); + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); auto PtrNode2 = sycl::detail::getSyclObjImpl(InOrderGraph) @@ -606,3 +1051,272 @@ TEST_F(CommandGraphTest, InOrderQueueWithEmptyLast) { ASSERT_EQ(*ScheduleIt, PtrNode2); ASSERT_EQ(InOrderQueue.get_context(), GraphExecImpl->getContext()); } + +TEST_F(CommandGraphTest, EnqueueBarrierExceptionCheck) { + testEnqueueBarrier(); + testEnqueueBarrier(); + testEnqueueBarrier(); +} + +TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) { + queue Q{ext::codeplay::experimental::property::queue::enable_fusion{}}; + + experimental::command_graph Graph{ + Q.get_context(), Q.get_device()}; + + ext::codeplay::experimental::fusion_wrapper fw{Q}; + + // Test: Start fusion on a queue that is in recording mode + Graph.begin_recording(Q); + + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + fw.start_fusion(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + Graph.end_recording(Q); + + // Test: begin recording a queue in fusion mode + + fw.start_fusion(); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + Graph.begin_recording(Q); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} + +TEST_F(CommandGraphTest, KernelPropertiesExceptionCheck) { + + // Test Parallel for entry point + testParallelForProperties<4>(Queue, Graph); + testParallelForProperties<4, 4>(Queue, Graph); + testParallelForProperties<8, 4>(Queue, Graph); + testParallelForProperties<4, 8>(Queue, Graph); + testParallelForProperties<4, 4, 4>(Queue, Graph); + testParallelForProperties<4, 4, 8>(Queue, Graph); + testParallelForProperties<8, 4, 4>(Queue, Graph); + testParallelForProperties<4, 8, 4>(Queue, Graph); + + // Test Single Task entry point + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::work_group_size<4>}; + auto KernelFunction = [](auto) {}; + testSingleTaskProperties(Graph, Queue, Props, + KernelFunction); + testSingleTaskProperties(Graph, Queue, Props, + KernelFunction); +} + +TEST_F(CommandGraphTest, Memcpy2DExceptionCheck) { + constexpr size_t RECT_WIDTH = 30; + constexpr size_t RECT_HEIGHT = 21; + constexpr size_t SRC_ELEMS = RECT_WIDTH * RECT_HEIGHT; + constexpr size_t DST_ELEMS = SRC_ELEMS; + + using T = int; + + Graph.begin_recording(Queue); + + T *USMMemSrc = malloc_device(SRC_ELEMS, Queue); + T *USMMemDst = malloc_device(DST_ELEMS, Queue); + + addMemcpy2D( + Graph, Queue, USMMemDst, RECT_WIDTH * sizeof(T), USMMemSrc, + RECT_WIDTH * sizeof(T), RECT_WIDTH * sizeof(T), RECT_HEIGHT); + + addMemcpy2D( + Graph, Queue, USMMemDst, RECT_WIDTH * sizeof(T), USMMemSrc, + RECT_WIDTH * sizeof(T), RECT_WIDTH * sizeof(T), RECT_HEIGHT); + + Graph.end_recording(); + + addMemcpy2D( + Graph, Queue, USMMemDst, RECT_WIDTH * sizeof(T), USMMemSrc, + RECT_WIDTH * sizeof(T), RECT_WIDTH * sizeof(T), RECT_HEIGHT); + + sycl::free(USMMemSrc, Queue); + sycl::free(USMMemDst, Queue); +} + +// Tests that using specialization constants in a graph will throw. +TEST_F(CommandGraphTest, SpecializationConstant) { + + ASSERT_THROW( + { + try { + Graph.add([&](handler &CGH) { + CGH.set_specialization_constant(8); + }); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); + ASSERT_THROW( + { + try { + Graph.add([&](handler &CGH) { + int Value = CGH.get_specialization_constant(); + (void)Value; + }); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); +} + +// Tests that using kernel bundles in a graph will throw. +TEST_F(CommandGraphTest, KernelBundle) { + sycl::kernel_bundle KernelBundle = + sycl::get_kernel_bundle( + Queue.get_context(), {Dev}); + + ASSERT_THROW( + { + try { + Graph.add([&](handler &CGH) { CGH.use_kernel_bundle(KernelBundle); }); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); +} + +// Tests that using reductions in a graph will throw. +TEST_F(CommandGraphTest, Reductions) { + int ReduVar = 0; + ASSERT_THROW( + { + try { + Graph.add([&](handler &CGH) { + CGH.parallel_for( + range<1>{1}, reduction(&ReduVar, int{0}, sycl::plus<>()), + [=](item<1> idx, auto &Sum) {}); + }); + } catch (const sycl::exception &e) { + ASSERT_EQ(e.code(), make_error_code(sycl::errc::invalid)); + throw; + } + }, + sycl::exception); +} + +TEST_F(CommandGraphTest, BindlessExceptionCheck) { + auto Ctxt = Queue.get_context(); + + // declare image data + size_t Height = 13; + size_t Width = 7; + size_t Depth = 11; + size_t N = Height * Width * Depth; + std::vector DataIn(N); + + // Extension: image descriptor - can use the same for both images + sycl::ext::oneapi::experimental::image_descriptor Desc( + {Width, Height, Depth}, sycl::image_channel_order::rgba, + sycl::image_channel_type::fp32); + + // Extension: allocate memory on device and create the handle + // Input images memory + sycl::ext::oneapi::experimental::image_mem ImgMem(Desc, Dev, Ctxt); + // Extension: returns the device pointer to USM allocated pitched memory + size_t Pitch = 0; + auto ImgMemUSM = sycl::ext::oneapi::experimental::pitched_alloc_device( + &Pitch, Desc, Queue); + + Graph.begin_recording(Queue); + + addImagesCopies(Graph, Queue, ImgMem, DataIn, + ImgMemUSM, Pitch, Desc); + + addImagesCopies(Graph, Queue, ImgMem, DataIn, + ImgMemUSM, Pitch, Desc); + + Graph.end_recording(); + + addImagesCopies(Graph, Queue, ImgMem, DataIn, + ImgMemUSM, Pitch, Desc); + + sycl::free(ImgMemUSM, Ctxt); +} + +TEST_F(CommandGraphTest, GetProfilingInfoExceptionCheck) { + sycl::context Ctx{Dev}; + sycl::queue QueueProfile{ + Ctx, Dev, sycl::property_list{sycl::property::queue::enable_profiling{}}}; + experimental::command_graph + GraphProfile{QueueProfile.get_context(), Dev}; + + GraphProfile.begin_recording(QueueProfile); + auto Event = QueueProfile.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + + // Checks that exception is thrown when get_profile_info is called on "event" + // returned by a queue in recording mode. + std::error_code ExceptionCode = make_error_code(sycl::errc::success); + try { + Event.get_profiling_info(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + Event.get_profiling_info(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + Event.get_profiling_info(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + GraphProfile.end_recording(); + + auto GraphExec = GraphProfile.finalize(); + auto EventSub = QueueProfile.submit( + [&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExec); }); + + // Checks that exception is thrown when get_profile_info is called on "event" + // returned by a graph submission. + ExceptionCode = make_error_code(sycl::errc::success); + try { + EventSub.get_profiling_info(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + EventSub.get_profiling_info(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); + + ExceptionCode = make_error_code(sycl::errc::success); + try { + EventSub.get_profiling_info(); + } catch (exception &Exception) { + ExceptionCode = Exception.code(); + } + ASSERT_EQ(ExceptionCode, sycl::errc::invalid); +} diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 5c1241ca0d49e..c393d57d16d6c 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -164,7 +164,7 @@ inline pi_result mock_piDeviceGetInfo(pi_device device, size_t *param_value_size_ret) { constexpr char MockDeviceName[] = "Mock device"; constexpr char MockSupportedExtensions[] = - "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program"; + "cl_khr_fp64 cl_khr_fp16 cl_khr_il_program ur_exp_command_buffer"; switch (param_name) { case PI_DEVICE_INFO_TYPE: { // Act like any device is a GPU.