Skip to content

Commit

Permalink
Merge pull request #2261 from againull/againull/2d_block_exp
Browse files Browse the repository at this point in the history
Add new device descriptor to query 2D block array capabilities of the Intel GPU
  • Loading branch information
callumfare authored Nov 27, 2024
2 parents 0a90db9 + c79df59 commit db83117
Show file tree
Hide file tree
Showing 25 changed files with 333 additions and 23 deletions.
27 changes: 26 additions & 1 deletion cmake/FetchLevelZero.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ set(UR_LEVEL_ZERO_LOADER_LIBRARY "" CACHE FILEPATH "Path of the Level Zero Loade
set(UR_LEVEL_ZERO_INCLUDE_DIR "" CACHE FILEPATH "Directory containing the Level Zero Headers")
set(UR_LEVEL_ZERO_LOADER_REPO "" CACHE STRING "Github repo to get the Level Zero loader sources from")
set(UR_LEVEL_ZERO_LOADER_TAG "" CACHE STRING " GIT tag of the Level Loader taken from github repo")
set(UR_COMPUTE_RUNTIME_REPO "" CACHE STRING "Github repo to get the compute runtime sources from")
set(UR_COMPUTE_RUNTIME_TAG "" CACHE STRING " GIT tag of the compute runtime taken from github repo")

# Copy Level Zero loader/headers locally to the build to avoid leaking their path.
set(LEVEL_ZERO_COPY_DIR ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader)
Expand Down Expand Up @@ -87,8 +89,31 @@ target_link_libraries(LevelZeroLoader
INTERFACE "${LEVEL_ZERO_LIB_NAME}"
)

file(GLOB LEVEL_ZERO_LOADER_API_HEADERS "${LEVEL_ZERO_INCLUDE_DIR}/*.h")
file(COPY ${LEVEL_ZERO_LOADER_API_HEADERS} DESTINATION ${LEVEL_ZERO_INCLUDE_DIR}/level_zero)
add_library(LevelZeroLoader-Headers INTERFACE)
target_include_directories(LevelZeroLoader-Headers
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR}>"
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR};${LEVEL_ZERO_INCLUDE_DIR}/level_zero>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)

if (UR_COMPUTE_RUNTIME_REPO STREQUAL "")
set(UR_COMPUTE_RUNTIME_REPO "https://github.com/intel/compute-runtime.git")
endif()
if (UR_COMPUTE_RUNTIME_TAG STREQUAL "")
set(UR_COMPUTE_RUNTIME_TAG 24.39.31294.12)
endif()
include(FetchContent)
# Sparse fetch only the dir with level zero headers to avoid pulling in the entire compute-runtime.
FetchContentSparse_Declare(compute-runtime-level-zero-headers ${UR_COMPUTE_RUNTIME_REPO} "${UR_COMPUTE_RUNTIME_TAG}" "level_zero/include")
FetchContent_GetProperties(compute-runtime-level-zero-headers)
if(NOT compute-runtime-level-zero-headers_POPULATED)
FetchContent_Populate(compute-runtime-level-zero-headers)
endif()
add_library(ComputeRuntimeLevelZero-Headers INTERFACE)
set(COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE "${compute-runtime-level-zero-headers_SOURCE_DIR}/../..")
message(STATUS "Level Zero Adapter: Using Level Zero headers from ${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}")
target_include_directories(ComputeRuntimeLevelZero-Headers
INTERFACE "$<BUILD_INTERFACE:${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)
25 changes: 24 additions & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -1705,6 +1705,8 @@ typedef enum ur_device_info_t {
UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native
///< work
UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports low-power events.
UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP = 0x2022, ///< [::ur_exp_device_2d_block_array_capability_flags_t] return a bit-field
///< of Intel GPU 2D block array capabilities
/// @cond
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -1730,7 +1732,7 @@ typedef enum ur_device_info_t {
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName`
/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
/// - ::UR_RESULT_ERROR_INVALID_SIZE
Expand Down Expand Up @@ -7428,6 +7430,27 @@ urEnqueueWriteHostPipe(
///< an element of the phEventWaitList array.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities
#if !defined(__GNUC__)
#pragma region 2d_block_array_capabilities_(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Intel GPU 2D block array capabilities
typedef uint32_t ur_exp_device_2d_block_array_capability_flags_t;
typedef enum ur_exp_device_2d_block_array_capability_flag_t {
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD = UR_BIT(0), ///< Load instructions are supported
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE = UR_BIT(1), ///< Store instructions are supported
/// @cond
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_device_2d_block_array_capability_flag_t;
/// @brief Bit Mask for validating ur_exp_device_2d_block_array_capability_flags_t
#define UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAGS_MASK 0xfffffffc

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintMapFlags(enum ur_map_flag_t value, ch
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t enum
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpDevice_2dBlockArrayCapabilityFlags(enum ur_exp_device_2d_block_array_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_image_copy_flag_t enum
/// @returns
Expand Down
78 changes: 78 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ inline ur_result_t printFlag<ur_map_flag_t>(std::ostream &os, uint32_t flag);
template <>
inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);

Expand Down Expand Up @@ -328,6 +331,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_external_mem_type_t value);
Expand Down Expand Up @@ -2665,6 +2669,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
os << "UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP";
break;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
os << "UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -4472,6 +4479,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: {
const ur_exp_device_2d_block_array_capability_flags_t *tptr = (const ur_exp_device_2d_block_array_capability_flags_t *)ptr;
if (sizeof(ur_exp_device_2d_block_array_capability_flags_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_exp_device_2d_block_array_capability_flags_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

ur::details::printFlag<ur_exp_device_2d_block_array_capability_flag_t>(os,
*tptr);

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down Expand Up @@ -9455,6 +9475,64 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_device_2d_block_array_capability_flag_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value) {
switch (value) {
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD:
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD";
break;
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE:
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE";
break;
default:
os << "unknown enumerator";
break;
}
return os;
}

namespace ur::details {
///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t flag
template <>
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag) {
uint32_t val = flag;
bool first = true;

if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) {
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
}

if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) {
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
}
if (val != 0) {
std::bitset<32> bits(val);
if (!first) {
os << " | ";
}
os << "unknown bit flags " << bits;
} else if (first) {
os << "0";
}
return UR_RESULT_SUCCESS;
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_image_copy_flag_t type
/// @returns
/// std::ostream &
Expand Down
62 changes: 62 additions & 0 deletions scripts/core/EXP-2D-BLOCK-ARRAY-CAPABILITIES.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<%
OneApi=tags['$OneApi']
x=tags['$x']
X=x.upper()
%>

.. _experimental-2D-block-array-capabilities:

================================================================================
2D Block Array Capabilities
================================================================================

.. warning::

Experimental features:

* May be replaced, updated, or removed at any time.
* Do not require maintaining API/ABI stability of their own additions over
time.
* Do not require conformance testing of their own additions.


Motivation
--------------------------------------------------------------------------------
Some Intel GPU devices support 2D block array operations which may be used to optimize applications on Intel GPUs.
This extension provides a device descriptor which allows to query the 2D block array capabilities of a device.

API
--------------------------------------------------------------------------------

Enums
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ${x}_device_info_t
* ${X}_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP

* ${x}_exp_device_2d_block_array_capability_flags_t
* ${X}_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD
* ${X}_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE

Changelog
--------------------------------------------------------------------------------

+-----------+------------------------+
| Revision | Changes |
+===========+========================+
| 1.0 | Initial Draft |
+-----------+------------------------+


Support
--------------------------------------------------------------------------------

Adapters which support this experimental feature *must* return ${X}_RESULT_SUCCESS from
the ${x}DeviceGetInfo call with the new ${X}_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP
device descriptor.


Contributors
--------------------------------------------------------------------------------

* Artur Gainullin `[email protected] <[email protected]>`_
36 changes: 36 additions & 0 deletions scripts/core/exp-2d-block-array-capabilities.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#
# Copyright (C) 2024 Intel Corporation
#
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# See YaML.md for syntax definition
#
--- #--------------------------------------------------------------------------
type: header
desc: "Intel $OneApi Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities"
ordinal: "99"
--- #--------------------------------------------------------------------------
type: enum
extend: true
typed_etors: true
desc: "Extension enum to $x_device_info_t to query Intel device 2D block array capabilities."
name: $x_device_info_t
etors:
- name: 2D_BLOCK_ARRAY_CAPABILITIES_EXP
value: "0x2022"
desc: "[$x_exp_device_2d_block_array_capability_flags_t] return a bit-field of Intel GPU 2D block array capabilities"
--- #--------------------------------------------------------------------------
type: enum
desc: "Intel GPU 2D block array capabilities"
class: $xDevice
name: $x_exp_device_2d_block_array_capability_flags_t
etors:
- name: LOAD
desc: "Load instructions are supported"
value: "$X_BIT(0)"
- name: STORE
desc: "Store instructions are supported"
value: "$X_BIT(1)"

4 changes: 3 additions & 1 deletion source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;

case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
return ReturnValue(
static_cast<ur_exp_device_2d_block_array_capability_flags_t>(0));
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP:
return ReturnValue(true);
Expand Down
3 changes: 3 additions & 0 deletions source/adapters/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_IL_VERSION:
case UR_DEVICE_INFO_ASYNC_BARRIER:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
return ReturnValue(
static_cast<ur_exp_device_2d_block_array_capability_flags_t>(0));
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
int DriverVersion = 0;
UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion));
Expand Down
4 changes: 3 additions & 1 deletion source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ if(UR_BUILD_ADAPTER_L0)
# 'utils' target from 'level-zero-loader' includes path which is prefixed
# in the source directory, this breaks the installation of 'utils' target.
set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "")
install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ze_loader utils
install(TARGETS ur_umf LevelZeroLoader LevelZeroLoader-Headers ComputeRuntimeLevelZero-Headers ze_loader utils
EXPORT ${PROJECT_NAME}-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
Expand Down Expand Up @@ -109,6 +109,7 @@ if(UR_BUILD_ADAPTER_L0)
${PROJECT_NAME}::umf
LevelZeroLoader
LevelZeroLoader-Headers
ComputeRuntimeLevelZero-Headers
)

target_include_directories(ur_adapter_level_zero PRIVATE
Expand Down Expand Up @@ -203,6 +204,7 @@ if(UR_BUILD_ADAPTER_L0_V2)
${PROJECT_NAME}::umf
LevelZeroLoader
LevelZeroLoader-Headers
ComputeRuntimeLevelZero-Headers
)

target_include_directories(ur_adapter_level_zero_v2 PRIVATE
Expand Down
9 changes: 9 additions & 0 deletions source/adapters/level_zero/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "common.hpp"
#include "logger/ur_logger.hpp"
#include "usm.hpp"
#include <level_zero/include/ze_intel_gpu.h>

ur_result_t ze2urResult(ze_result_t ZeResult) {
if (ZeResult == ZE_RESULT_SUCCESS)
Expand Down Expand Up @@ -330,6 +331,14 @@ template <> zes_structure_type_t getZesStructureType<zes_mem_properties_t>() {
return ZES_STRUCTURE_TYPE_MEM_PROPERTIES;
}

#ifdef ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
template <>
ze_structure_type_t
getZeStructureType<ze_intel_device_block_array_exp_properties_t>() {
return ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_PROPERTIES;
}
#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME

// Global variables for ZER_EXT_RESULT_ADAPTER_SPECIFIC_ERROR
thread_local ur_result_t ErrorMessageCode = UR_RESULT_SUCCESS;
thread_local char ErrorMessage[MaxMessageSize];
Expand Down
Loading

0 comments on commit db83117

Please sign in to comment.