Skip to content

Commit

Permalink
# This is a combination of 4 commits.
Browse files Browse the repository at this point in the history
# This is the 1st commit message:

parent 9824163
author Mikołaj Komar <[email protected]> 1734527193 +0000
committer Mateusz P. Nowak <[email protected]> 1738059437 +0000

Prepare ground for command_buffer in v2

Enforce in order list usage, and add initialization and destruction to buffer

Add initial support of command buffers to adapter v2

Update UR calls handling

Remove unnecessary comment

Move not implemented command buffer commands to previous position

Fix most issues with code

Fix formatting and modify queue_api template

Move command buffer cleanup to destructor

Use cached command lists instead of created ones

Remove not needed function and change phrasing

Add initial implementation of command list manager

Use list manager instead of custom implementation in queue

Optimalize imports

Remove not needed destructor

Revert "Fix formatting"

This reverts commit 545e577.

# This is the commit message oneapi-src#2:

Move command list close to the command buffer

# This is the commit message oneapi-src#3:

Moved try outside function block

# This is the commit message oneapi-src#4:

Move enqueue generic command list back to queue
  • Loading branch information
Xewar313 authored and mateuszpn committed Jan 28, 2025
1 parent 9824163 commit 4a916cc
Show file tree
Hide file tree
Showing 14 changed files with 529 additions and 106 deletions.
1 change: 1 addition & 0 deletions Testing/Temporary/CTestCostData.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
---
21 changes: 21 additions & 0 deletions ft.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# add_test([=[enqueue-adapter_level_zero_v2]=] "/home/mateuszpn/pr2532/build/bin/test-enqueue" "--gtest_filter=*Level_Zero*")
# set_tests_properties([=[enqueue-adapter_level_zero_v2]=] PROPERTIES ENVIRONMENT
# "UR_ADAPTERS_FORCE_LOAD=\"/home/mateuszpn/pr2532/build/lib/libur_adapter_level_zero_v2.so.0.12.0\""
# "LABELS "conformance;adapter_level_zero_v2"
# WORKING_DIRECTORY "/home/mateuszpn/pr2532/build/test/conformance/enqueue" _BACKTRACE_TRIPLES "/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;22;add_test;/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;32;do_add_test;/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;67;add_test_adapter;/home/mateuszpn/pr2532/test/conformance/CMakeLists.txt;78;add_conformance_test;/home/mateuszpn/pr2532/test/conformance/enqueue/CMakeLists.txt;6;add_conformance_test_with_kernels_environment;/home/mateuszpn/pr2532/test/conformance/enqueue/CMakeLists.txt;0;")

# Set environment variable
export UR_ADAPTERS_FORCE_LOAD="/home/mateuszpn/pr2532/build/lib/libur_adapter_level_zero_v2.so.0.12.0"

# Set working directory
#cd /home/mateuszpn/pr2532/build/test/conformance/$1

# Run the test with the specified filter
#/home/mateuszpn/pr2532/build/bin/test-$1 --gtest_filter=*Level_Zero*

# Set working directory
cd /home/mateuszpn/pr2532/build/test/conformance/enqueue

# Run the test with the specified filter
#/home/mateuszpn/pr2532/build/bin/test-enqueue --gtest_filter=urEnqueueMemBufferMapTestWithParam.MapSignalEvent*Level_Zero*
/home/mateuszpn/pr2532/build/bin/test-enqueue --gtest_filter=*Level_Zero*
2 changes: 2 additions & 0 deletions scripts/templates/queue_api.cpp.mako
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ from templates import helper as th
*
*/

// Do not edit. This file is auto generated from a template: scripts/templates/queue_api.cpp.mako

#include "queue_api.hpp"
#include "ur_util.hpp"

Expand Down
7 changes: 7 additions & 0 deletions scripts/templates/queue_api.hpp.mako
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ from templates import helper as th
*
*/

// Do not edit. This file is auto generated from a template: scripts/templates/queue_api.hpp.mako

#pragma once

#include <ur_api.h>
#include <ze_api.h>

struct ur_queue_handle_t_ {
virtual ~ur_queue_handle_t_();
Expand All @@ -32,4 +35,8 @@ struct ur_queue_handle_t_ {
%for obj in th.get_queue_related_functions(specs, n, tags):
virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0;
%endfor

virtual ur_result_t
enqueueCommandBuffer(ze_command_list_handle_t, ur_event_handle_t *,
uint32_t, const ur_event_handle_t *) = 0;
};
4 changes: 4 additions & 0 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp
# v2-only sources
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_manager.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.hpp
Expand All @@ -159,7 +161,9 @@ if(UR_BUILD_ADAPTER_L0_V2)
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.cpp
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.cpp
Expand Down
58 changes: 0 additions & 58 deletions source/adapters/level_zero/v2/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,47 +239,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urCommandBufferCreateExp(ur_context_handle_t hContext,
ur_device_handle_t hDevice,
const ur_exp_command_buffer_desc_t *pCommandBufferDesc,
ur_exp_command_buffer_handle_t *phCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel,
uint32_t workDim, const size_t *pGlobalWorkOffset,
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent,
ur_exp_command_buffer_command_handle_t *phCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferAppendUSMMemcpyExp(
ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc,
size_t size, uint32_t numSyncPointsInWaitList,
Expand Down Expand Up @@ -415,14 +374,6 @@ ur_result_t urCommandBufferAppendUSMAdviseExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferEnqueueExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferRetainCommandExp(
ur_exp_command_buffer_command_handle_t hCommand) {
logger::error("{} function not implemented!", __FUNCTION__);
Expand All @@ -443,15 +394,6 @@ ur_result_t urCommandBufferUpdateKernelLaunchExp(
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t
urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
ur_exp_command_buffer_info_t propName,
size_t propSize, void *pPropValue,
size_t *pPropSizeRet) {
logger::error("{} function not implemented!", __FUNCTION__);
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

ur_result_t urCommandBufferUpdateSignalEventExp(
ur_exp_command_buffer_command_handle_t hCommand,
ur_event_handle_t *phEvent) {
Expand Down
185 changes: 185 additions & 0 deletions source/adapters/level_zero/v2/command_buffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
//===--------- command_buffer.cpp - Level Zero Adapter ---------------===//
//
// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "command_buffer.hpp"
#include "../helpers/kernel_helpers.hpp"
#include "../ur_interface_loader.hpp"
#include "logger/ur_logger.hpp"

namespace {

// Checks whether zeCommandListImmediateAppendCommandListsExp can be used for a
// given context.
void checkImmediateAppendSupport(ur_context_handle_t context) {
bool DriverSupportsImmediateAppend =
context->getPlatform()->ZeCommandListImmediateAppendExt.Supported;

if (!DriverSupportsImmediateAppend) {
logger::error("Adapter v2 is used but "
"the current driver does not support the "
"zeCommandListImmediateAppendCommandListsExp entrypoint.");
std::abort();
}
}

} // namespace

ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
ur_context_handle_t context, ur_device_handle_t device,
v2::raii::command_list_unique_handle &&commandList,
const ur_exp_command_buffer_desc_t *desc)
: commandListManager(
context, device,
std::forward<v2::raii::command_list_unique_handle>(commandList)),
isUpdatable(desc ? desc->isUpdatable : false) {}

ur_result_t ur_exp_command_buffer_handle_t_::closeCommandList() {
// It is not allowed to append to command list from multiple threads.
std::scoped_lock<ur_shared_mutex> guard(this->Mutex);

// Close the command lists and have them ready for dispatch.
ZE2UR_CALL(zeCommandListClose, (this->commandListManager.getZeCommandList()));
return UR_RESULT_SUCCESS;
}

namespace ur::level_zero {

ur_result_t
urCommandBufferCreateExp(ur_context_handle_t context, ur_device_handle_t device,
const ur_exp_command_buffer_desc_t *commandBufferDesc,
ur_exp_command_buffer_handle_t *commandBuffer) try {
checkImmediateAppendSupport(context);

if (!context->getPlatform()->ZeMutableCmdListExt.Supported) {
throw UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

using queue_group_type = ur_device_handle_t_::queue_group_info_t::type;
uint32_t queueGroupOrdinal =
device->QueueGroup[queue_group_type::Compute].ZeOrdinal;
v2::raii::command_list_unique_handle zeCommandList =
context->commandListCache.getRegularCommandList(device->ZeDevice, true,
queueGroupOrdinal, true);

*commandBuffer = new ur_exp_command_buffer_handle_t_(
context, device, std::move(zeCommandList), commandBufferDesc);
return UR_RESULT_SUCCESS;

} catch (const std::bad_alloc &) {
return exceptionToResult(std::current_exception());
}

ur_result_t
urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) try {
hCommandBuffer->RefCount.increment();
return UR_RESULT_SUCCESS;
} catch (const std::bad_alloc &) {
return exceptionToResult(std::current_exception());
}

ur_result_t
urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) try {
if (!hCommandBuffer->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;

delete hCommandBuffer;
return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) try {
UR_ASSERT(hCommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER);
UR_ASSERT(!hCommandBuffer->isFinalized, UR_RESULT_ERROR_INVALID_OPERATION);
hCommandBuffer->closeCommandList();

hCommandBuffer->isFinalized = true;
return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_handle_t commandBuffer, ur_kernel_handle_t hKernel,
uint32_t workDim, const size_t *pGlobalWorkOffset,
const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize,
uint32_t numKernelAlternatives, ur_kernel_handle_t *kernelAlternatives,
uint32_t numSyncPointsInWaitList,
const ur_exp_command_buffer_sync_point_t *syncPointWaitList,
uint32_t numEventsInWaitList, const ur_event_handle_t *eventWaitList,
ur_exp_command_buffer_sync_point_t *retSyncPoint, ur_event_handle_t *event,
ur_exp_command_buffer_command_handle_t *command)

try {
// Need to know semantics
// - should they be checked before kernel execution or before kernel
// appending to list if latter then it is easy fix, if former then TODO
std::ignore = numEventsInWaitList;
std::ignore = eventWaitList;
std::ignore = event;

// sync mechanic can be ignored, because all lists are in-order
std::ignore = numSyncPointsInWaitList;
std::ignore = syncPointWaitList;
std::ignore = retSyncPoint;

// TODO
std::ignore = numKernelAlternatives;
std::ignore = kernelAlternatives;
std::ignore = command;
UR_CALL(commandBuffer->commandListManager.appendKernelLaunch(
hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, 0,
nullptr, nullptr));
return UR_RESULT_SUCCESS;
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t urCommandBufferEnqueueExp(
ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue,
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) try {
return hQueue->enqueueCommandBuffer(
hCommandBuffer->commandListManager.getZeCommandList(), phEvent,
numEventsInWaitList, phEventWaitList);
} catch (...) {
return exceptionToResult(std::current_exception());
}

ur_result_t
urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer,
ur_exp_command_buffer_info_t propName,
size_t propSize, void *pPropValue,
size_t *pPropSizeRet) try {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);

switch (propName) {
case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT:
return ReturnValue(uint32_t{hCommandBuffer->RefCount.load()});
case UR_EXP_COMMAND_BUFFER_INFO_DESCRIPTOR: {
ur_exp_command_buffer_desc_t Descriptor{};
Descriptor.stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC;
Descriptor.pNext = nullptr;
Descriptor.isUpdatable = hCommandBuffer->isUpdatable;
Descriptor.isInOrder = true;
Descriptor.enableProfiling = hCommandBuffer->isProfilingEnabled;

return ReturnValue(Descriptor);
}
default:
assert(!"Command-buffer info request not implemented");
}
return UR_RESULT_ERROR_INVALID_ENUMERATION;
} catch (...) {
return exceptionToResult(std::current_exception());
}

} // namespace ur::level_zero
57 changes: 57 additions & 0 deletions source/adapters/level_zero/v2/command_buffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//===--------- command_buffer.hpp - Level Zero Adapter ---------------===//
//
// Copyright (C) 2024 Intel Corporation
//
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
// Exceptions. See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include "command_list_manager.hpp"
#include "common.hpp"
#include "context.hpp"
#include "kernel.hpp"
#include "queue_api.hpp"
#include <ze_api.h>

struct command_buffer_profiling_t {
ur_exp_command_buffer_sync_point_t numEvents;
ze_kernel_timestamp_result_t *timestamps;
};

struct ur_exp_command_buffer_handle_t_ : public _ur_object {
ur_exp_command_buffer_handle_t_(
ur_context_handle_t context, ur_device_handle_t device,
v2::raii::command_list_unique_handle &&commandList,
const ur_exp_command_buffer_desc_t *desc);
~ur_exp_command_buffer_handle_t_() = default;
ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent,
ur_command_t commandType);

ur_command_list_manager commandListManager;

ur_result_t closeCommandList();

std::vector<ze_event_handle_t> waitList;

// Indicates if command-buffer commands can be updated after it is closed.
bool isUpdatable = false;
// Indicates if command buffer was finalized.
bool isFinalized = false;
// Command-buffer profiling is enabled.
bool isProfilingEnabled = false;
};

struct ur_exp_command_buffer_command_handle_t_ : public _ur_object {
ur_exp_command_buffer_command_handle_t_(ur_exp_command_buffer_handle_t,
uint64_t);

~ur_exp_command_buffer_command_handle_t_();

// Command-buffer of this command.
ur_exp_command_buffer_handle_t commandBuffer;
// L0 command ID identifying this command
uint64_t commandId;
};
Loading

0 comments on commit 4a916cc

Please sign in to comment.