From 13c5f3c8c78b48056002d214225cb2813dc7572a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Komar?= Date: Fri, 7 Feb 2025 14:32:27 +0000 Subject: [PATCH 1/4] Add USMMemcpy to v2 command buffer --- source/adapters/level_zero/v2/api.cpp | 11 -------- .../adapters/level_zero/v2/command_buffer.cpp | 27 +++++++++++++++++++ .../level_zero/v2/command_list_manager.cpp | 24 +++++++++++++++++ .../level_zero/v2/command_list_manager.hpp | 5 ++++ .../v2/queue_immediate_in_order.cpp | 18 +++---------- 5 files changed, 59 insertions(+), 26 deletions(-) diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index 88fde2cfac..1b8da4acb4 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -239,17 +239,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t urCommandBufferAppendUSMMemcpyExp( - ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, - size_t size, uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, const void *pPattern, size_t patternSize, size_t size, diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp index c451d2cdac..41170cd9b7 100644 --- a/source/adapters/level_zero/v2/command_buffer.cpp +++ b/source/adapters/level_zero/v2/command_buffer.cpp @@ -138,6 +138,33 @@ ur_result_t urCommandBufferAppendKernelLaunchExp( return exceptionToResult(std::current_exception()); } +ur_result_t urCommandBufferAppendUSMMemcpyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, + ur_exp_command_buffer_command_handle_t *phCommand) try { + + // the same issue as in urCommandBufferAppendKernelLaunchExp + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = pSyncPointWaitList; + std::ignore = pSyncPoint; + + std::ignore = phCommand; + + UR_CALL(hCommandBuffer->commandListManager.enqueueUSMMemcpy( + false, pDst, pSrc, size, 0, nullptr, nullptr)); + + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + ur_result_t urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, diff --git a/source/adapters/level_zero/v2/command_list_manager.cpp b/source/adapters/level_zero/v2/command_list_manager.cpp index 09be360631..e7ebd9d08c 100644 --- a/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/source/adapters/level_zero/v2/command_list_manager.cpp @@ -102,6 +102,30 @@ ur_result_t ur_command_list_manager::appendKernelLaunch( return UR_RESULT_SUCCESS; } +ur_result_t ur_command_list_manager::enqueueUSMMemcpy( + bool blocking, void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::enqueueUSMMemcpy"); + + std::scoped_lock lock(this->Mutex); + + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); + + auto [pWaitEvents, numWaitEvents] = + getWaitListView(phEventWaitList, numEventsInWaitList); + + ZE2UR_CALL(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), pDst, pSrc, size, zeSignalEvent, + numWaitEvents, pWaitEvents)); + + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, (zeCommandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; +} + ze_command_list_handle_t ur_command_list_manager::getZeCommandList() { return zeCommandList.get(); } diff --git a/source/adapters/level_zero/v2/command_list_manager.hpp b/source/adapters/level_zero/v2/command_list_manager.hpp index 60de1363c7..6ed3908e90 100644 --- a/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/source/adapters/level_zero/v2/command_list_manager.hpp @@ -47,6 +47,11 @@ struct ur_command_list_manager : public _ur_object { const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + ze_command_list_handle_t getZeCommandList(); wait_list_view getWaitListView(const ur_event_handle_t *phWaitEvents, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 4d66607250..4ffcb8c36e 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -698,21 +698,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( // TODO: parametrize latency tracking with 'blocking' TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy"); - std::scoped_lock lock(this->Mutex); - - auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); - - auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList); - - ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (commandListManager.getZeCommandList(), pDst, pSrc, size, - zeSignalEvent, numWaitEvents, pWaitEvents)); - - if (blocking) { - ZE2UR_CALL(zeCommandListHostSynchronize, - (commandListManager.getZeCommandList(), UINT64_MAX)); - } + UR_CALL(commandListManager.enqueueUSMMemcpy(blocking, pDst, pSrc, size, + numEventsInWaitList, + phEventWaitList, phEvent)); return UR_RESULT_SUCCESS; } From cae94663546d7a70251cf35374f6ac2652d048db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Komar?= Date: Mon, 10 Feb 2025 11:25:28 +0000 Subject: [PATCH 2/4] Add comment explaining engine choice offload --- source/adapters/level_zero/v2/command_buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp index 41170cd9b7..9eaa1b5375 100644 --- a/source/adapters/level_zero/v2/command_buffer.cpp +++ b/source/adapters/level_zero/v2/command_buffer.cpp @@ -156,7 +156,7 @@ ur_result_t urCommandBufferAppendUSMMemcpyExp( std::ignore = pSyncPoint; std::ignore = phCommand; - + // Responsibility of UMD to offload to copy engine UR_CALL(hCommandBuffer->commandListManager.enqueueUSMMemcpy( false, pDst, pSrc, size, 0, nullptr, nullptr)); From 05e1bff77ec4302147eecd3838999c4363c73332 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Komar?= Date: Mon, 10 Feb 2025 13:42:35 +0000 Subject: [PATCH 3/4] Rename USMMemcpy operation --- source/adapters/level_zero/v2/command_buffer.cpp | 2 +- source/adapters/level_zero/v2/command_list_manager.cpp | 4 ++-- source/adapters/level_zero/v2/command_list_manager.hpp | 2 +- source/adapters/level_zero/v2/queue_immediate_in_order.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp index 9eaa1b5375..2c92e54848 100644 --- a/source/adapters/level_zero/v2/command_buffer.cpp +++ b/source/adapters/level_zero/v2/command_buffer.cpp @@ -157,7 +157,7 @@ ur_result_t urCommandBufferAppendUSMMemcpyExp( std::ignore = phCommand; // Responsibility of UMD to offload to copy engine - UR_CALL(hCommandBuffer->commandListManager.enqueueUSMMemcpy( + UR_CALL(hCommandBuffer->commandListManager.appendUSMMemcpy( false, pDst, pSrc, size, 0, nullptr, nullptr)); return UR_RESULT_SUCCESS; diff --git a/source/adapters/level_zero/v2/command_list_manager.cpp b/source/adapters/level_zero/v2/command_list_manager.cpp index e7ebd9d08c..b57c53344f 100644 --- a/source/adapters/level_zero/v2/command_list_manager.cpp +++ b/source/adapters/level_zero/v2/command_list_manager.cpp @@ -102,11 +102,11 @@ ur_result_t ur_command_list_manager::appendKernelLaunch( return UR_RESULT_SUCCESS; } -ur_result_t ur_command_list_manager::enqueueUSMMemcpy( +ur_result_t ur_command_list_manager::appendUSMMemcpy( bool blocking, void *pDst, const void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - TRACK_SCOPE_LATENCY("ur_command_list_manager::enqueueUSMMemcpy"); + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendUSMMemcpy"); std::scoped_lock lock(this->Mutex); diff --git a/source/adapters/level_zero/v2/command_list_manager.hpp b/source/adapters/level_zero/v2/command_list_manager.hpp index 6ed3908e90..f1bbd80d9b 100644 --- a/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/source/adapters/level_zero/v2/command_list_manager.hpp @@ -47,7 +47,7 @@ struct ur_command_list_manager : public _ur_object { const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); - ur_result_t enqueueUSMMemcpy(bool blocking, void *pDst, const void *pSrc, + ur_result_t appendUSMMemcpy(bool blocking, void *pDst, const void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 4ffcb8c36e..ea87533590 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -698,7 +698,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( // TODO: parametrize latency tracking with 'blocking' TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy"); - UR_CALL(commandListManager.enqueueUSMMemcpy(blocking, pDst, pSrc, size, + UR_CALL(commandListManager.appendUSMMemcpy(blocking, pDst, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent)); From 80379ca5e94710516b5f89251b139505ea141a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Komar?= Date: Mon, 10 Feb 2025 15:22:33 +0000 Subject: [PATCH 4/4] Fix formatting --- source/adapters/level_zero/v2/command_list_manager.hpp | 6 +++--- source/adapters/level_zero/v2/queue_immediate_in_order.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/source/adapters/level_zero/v2/command_list_manager.hpp b/source/adapters/level_zero/v2/command_list_manager.hpp index f1bbd80d9b..975a3a792c 100644 --- a/source/adapters/level_zero/v2/command_list_manager.hpp +++ b/source/adapters/level_zero/v2/command_list_manager.hpp @@ -48,9 +48,9 @@ struct ur_command_list_manager : public _ur_object { ur_event_handle_t *phEvent); ur_result_t appendUSMMemcpy(bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent); + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ze_command_list_handle_t getZeCommandList(); diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index ea87533590..94f2f90b4c 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -699,8 +699,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueUSMMemcpy"); UR_CALL(commandListManager.appendUSMMemcpy(blocking, pDst, pSrc, size, - numEventsInWaitList, - phEventWaitList, phEvent)); + numEventsInWaitList, + phEventWaitList, phEvent)); return UR_RESULT_SUCCESS; }