diff --git a/buildbot/configure.py b/buildbot/configure.py index f3a43857b7e1a..01abeda250049 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -81,6 +81,9 @@ def do_configure(args): sycl_build_pi_hip_platform = args.hip_platform sycl_enabled_plugins.append("hip") + if args.unified_runtime: + sycl_enabled_plugins.append("unified_runtime") + # all llvm compiler targets don't require 3rd party dependencies, so can be # built/tested even if specific runtimes are not available if args.enable_all_llvm_targets: @@ -220,6 +223,7 @@ def main(): metavar="BUILD_TYPE", default="Release", help="build type: Debug, Release") parser.add_argument("--cuda", action='store_true', help="switch from OpenCL to CUDA") parser.add_argument("--hip", action='store_true', help="switch from OpenCL to HIP") + parser.add_argument("--unified_runtime", action='store_true', help="switch from OpenCL to Unified Runtime") parser.add_argument("--hip-platform", type=str, choices=['AMD', 'NVIDIA'], default='AMD', help="choose hardware platform for HIP backend") parser.add_argument("--host-target", default='X86', help="host LLVM target architecture, defaults to X86, multiple targets may be provided as a semi-colon separated string") diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 86c5b420d4be6..4af02e08b505e 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -157,6 +157,9 @@ endif() if ("esimd_emulator" IN_LIST SYCL_ENABLE_PLUGINS) set(SYCL_BUILD_PI_ESIMD_EMULATOR ON) endif() +if ("unified_runtime" IN_LIST SYCL_ENABLE_PLUGINS) + set(SYCL_BUILD_UNIFIED_RUNTIME ON) +endif() # Configure SYCL version macro set(sycl_inc_dir ${CMAKE_CURRENT_SOURCE_DIR}/include) @@ -360,6 +363,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS file-table-tform level-zero-loader level-zero-headers + unified-runtime # TODO: is this needed? llc llvm-ar llvm-foreach diff --git a/sycl/include/sycl/backend_types.hpp b/sycl/include/sycl/backend_types.hpp index 563ff1ed60416..4e335bb72462a 100644 --- a/sycl/include/sycl/backend_types.hpp +++ b/sycl/include/sycl/backend_types.hpp @@ -32,6 +32,7 @@ enum class backend : char { ext_intel_esimd_emulator, ext_oneapi_hip = 6, hip __SYCL2020_DEPRECATED("use 'ext_oneapi_hip' instead") = ext_oneapi_hip, + unified_runtime = 7, }; template class backend_traits; @@ -63,6 +64,9 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) { case backend::ext_oneapi_hip: Out << "ext_oneapi_hip"; break; + case backend::unified_runtime: + Out << "unified_runtime"; + break; case backend::all: Out << "all"; } diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index 48585ffff38bd..5ef702cdc5344 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -67,18 +67,21 @@ bool trace(TraceLevel level); #define __SYCL_CUDA_PLUGIN_NAME "pi_cuda.dll" #define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "pi_esimd_emulator.dll" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dll" +#define __SYCL_UNFIED_RUNTIME_PLUGIN_NAME "TODO.dll" #elif defined(__SYCL_RT_OS_LINUX) #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.so" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.so" #define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.so" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.so" +#define __SYCL_UNFIED_RUNTIME_PLUGIN_NAME "libpi_unified_runtime.so" #elif defined(__SYCL_RT_OS_DARWIN) #define __SYCL_OPENCL_PLUGIN_NAME "libpi_opencl.dylib" #define __SYCL_LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.dylib" #define __SYCL_CUDA_PLUGIN_NAME "libpi_cuda.dylib" #define __SYCL_ESIMD_EMULATOR_PLUGIN_NAME "libpi_esimd_emulator.dylib" #define __SYCL_HIP_PLUGIN_NAME "libpi_hip.dylib" +#define __SYCL_UNFIED_RUNTIME_PLUGIN_NAME "TODO.dylib" #else #error "Unsupported OS" #endif diff --git a/sycl/plugins/CMakeLists.txt b/sycl/plugins/CMakeLists.txt index 85fe986249cb3..091b7d6b57fa1 100755 --- a/sycl/plugins/CMakeLists.txt +++ b/sycl/plugins/CMakeLists.txt @@ -8,8 +8,3 @@ foreach(plugin ${SYCL_ENABLE_PLUGINS}) add_subdirectory(${plugin}) endforeach() -# level_zero plugin depends today on unified_runtime plugin -# and unified_runtime plugin is not an independent plugin, adding it explicitly -if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) - add_subdirectory(unified_runtime) -endif() diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt index 7b0ce7eaeeaab..807be52e74302 100755 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ b/sycl/plugins/level_zero/CMakeLists.txt @@ -1,131 +1,100 @@ # PI Level Zero plugin library -if(MSVC) - set(LEVEL_ZERO_LOADER - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}") -else() - set(LEVEL_ZERO_LOADER - "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}") -endif() - if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) message(STATUS "Download Level Zero loader and headers from github.com") - if (CMAKE_C_COMPILER) - list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) - endif() - if (CMAKE_CXX_COMPILER) - list(APPEND AUX_CMAKE_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) - endif() - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build) - set(LEVEL_ZERO_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero/level_zero_loader") - if (NOT DEFINED SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE) - set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE}) - endif() - set(LEVEL_ZERO_LOADER_SOURCE_VERSION v1.8.8) - - #TODO: Replace ExternalProject with FetchContent for better maintainance and - # cmake files simplification - include(ExternalProject) - ExternalProject_Add(level-zero-loader - GIT_REPOSITORY https://github.com/oneapi-src/level-zero.git - GIT_TAG ${LEVEL_ZERO_LOADER_SOURCE_VERSION} - UPDATE_DISCONNECTED ${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE} - SOURCE_DIR ${LEVEL_ZERO_LOADER_SOURCE_DIR} - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build" - INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install" - CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} - -DCMAKE_INSTALL_PREFIX= - -DCMAKE_INSTALL_LIBDIR:PATH=lib${LLVM_LIBDIR_SUFFIX} - ${AUX_CMAKE_FLAGS} - LOG_DOWNLOAD 1 - LOG_UPDATE 1 - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 - STEP_TARGETS configure,build,install - DEPENDS OpenCL-Headers - BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER} - ) - ExternalProject_Add_Step(level-zero-loader llvminstall - COMMAND ${CMAKE_COMMAND} -E copy_directory /lib/ ${LLVM_BINARY_DIR}/lib - COMMAND ${CMAKE_COMMAND} -E copy_directory /include/ ${LLVM_BINARY_DIR}/include/sycl - COMMENT "Installing level-zero-loader into the LLVM binary directory" - DEPENDEES install - ) - install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install/" - DESTINATION "." - COMPONENT level-zero-loader - ) + set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") + set(LEVEL_ZERO_LOADER_TAG v1.8.8) - set(LEVEL_ZERO_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install/include/ - CACHE INTERNAL "Path containing Level_Zero header files.") -else() - file(GLOB LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}*") - get_filename_component(LEVEL_ZERO_LIB_NAME ${LEVEL_ZERO_LIBRARY} NAME) - add_custom_target(level-zero-loader - DEPENDS - ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LEVEL_ZERO_LIB_NAME} - ${LLVM_BINARY_DIR}/include/sycl/level_zero - ) - add_custom_command( - OUTPUT - ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LEVEL_ZERO_LIB_NAME} - ${LLVM_BINARY_DIR}/include/sycl/level_zero - COMMENT - "Copying Level Zero loader and headers" - COMMAND - ${CMAKE_COMMAND} -E copy ${LEVEL_ZERO_LIBRARY_SRC} ${LLVM_LIBRARY_OUTPUT_INTDIR} - COMMAND - ${CMAKE_COMMAND} -E copy_directory ${LEVEL_ZERO_INCLUDE_DIR} ${LLVM_BINARY_DIR}/include/sycl - DEPENDS - ${LEVEL_ZERO_LIBRARY} - ${LEVEL_ZERO_INCLUDE_DIR} + # Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104 + set(CMAKE_INCLUDE_CURRENT_DIR OFF) + + message(STATUS "Will fetch Level Zero Loader from ${LEVEL_ZERO_LOADER_REPO}") + include(FetchContent) + FetchContent_Declare(level-zero-loader + GIT_REPOSITORY ${LEVEL_ZERO_LOADER_REPO} + GIT_TAG ${LEVEL_ZERO_LOADER_TAG} ) -endif() -list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS level-zero-loader level-zero-headers) + # Suppress some pedantic warnings for Level Zero build + set(CMAKE_CXX_FLAGS_BAK "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-but-set-variable") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pedantic") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-truncation") -include_directories("${LEVEL_ZERO_INCLUDE_DIR}") + FetchContent_MakeAvailable(level-zero-loader) + FetchContent_GetProperties(level-zero-loader) + + # Restore original flags + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_BAK}") + + set(LEVEL_ZERO_LIBRARY ze_loader) + set(LEVEL_ZERO_INCLUDE_DIR + ${level-zero-loader_SOURCE_DIR}/include CACHE PATH "Path to Level Zero Headers") +endif() + +add_library (LevelZeroLoader INTERFACE) +target_link_libraries(LevelZeroLoader + INTERFACE "${LEVEL_ZERO_LIBRARY}" +) add_library (LevelZeroLoader-Headers INTERFACE) -add_library (LevelZeroLoader::Headers ALIAS LevelZeroLoader-Headers) target_include_directories(LevelZeroLoader-Headers INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" ) if (SYCL_ENABLE_XPTI_TRACING) set(XPTI_PROXY_SRC "${CMAKE_SOURCE_DIR}/../xpti/src/xpti_proxy.cpp") + set(XPTI_INCLUDE "${CMAKE_SOURCE_DIR}/../xpti/include") + set(XPTI_LIBS "${CMAKE_DL_LIBS}") endif() +find_package(Python3 REQUIRED) + +add_custom_target(ze-api + COMMAND ${Python3_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/ze_api_generator.py + ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h + BYPRODUCTS + ${CMAKE_CURRENT_BINARY_DIR}/ze_api.def + ) + find_package(Threads REQUIRED) add_sycl_plugin(level_zero SOURCES - "${sycl_inc_dir}/sycl/detail/pi.h" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/usm_allocator.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/usm_allocator.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp" + # These are short-term shared with Unified Runtime + # The two plugins define a few things differrently so must + # be built separately. This difference is spelled in + # their "ur_bindings.hpp" files. + # + "ur_bindings.hpp" + "../unified_runtime/pi2ur.hpp" + "../unified_runtime/pi2ur.cpp" + "../unified_runtime/ur/ur.hpp" + "../unified_runtime/ur/ur.cpp" + "../unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp" + "../unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp" + # Following are the PI Level-Zero Plugin only codes. + "pi_level_zero.cpp" + "pi_level_zero.hpp" + "usm_allocator.cpp" + "usm_allocator.hpp" + "tracing.cpp" ${XPTI_PROXY_SRC} + INCLUDE_DIRS + ${CMAKE_CURRENT_BINARY_DIR} # for ze_api.def + ${CMAKE_CURRENT_SOURCE_DIR} # for Level-Zero Plugin "ur_bindings.hpp" + ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime # for Unified Runtime + ${XPTI_INCLUDE} LIBRARIES - "${LEVEL_ZERO_LOADER}" + LevelZeroLoader-Headers + UnifiedRuntime-Headers + LevelZeroLoader Threads::Threads - unified_runtime + ${XPTI_LIBS} ) -find_package(Python3 REQUIRED) - -add_custom_target(ze-api - COMMAND ${Python3_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/ze_api_generator.py - ${LEVEL_ZERO_INCLUDE_DIR}/level_zero/ze_api.h - BYPRODUCTS - ${CMAKE_CURRENT_BINARY_DIR}/ze_api.def - ) -target_include_directories(pi_level_zero PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) add_dependencies(pi_level_zero ze-api) if (SYCL_ENABLE_XPTI_TRACING) @@ -133,11 +102,4 @@ if (SYCL_ENABLE_XPTI_TRACING) XPTI_ENABLE_INSTRUMENTATION XPTI_STATIC_LIBRARY ) - target_include_directories(pi_level_zero PRIVATE "${CMAKE_SOURCE_DIR}/../xpti/include") - target_link_libraries(pi_level_zero PRIVATE ${CMAKE_DL_LIBS}) -endif() - -if (TARGET level-zero-loader) - add_dependencies(ze-api level-zero-loader) - add_dependencies(pi_level_zero level-zero-loader) endif() diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8659a42a058dc..d16472c114a70 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -24,8 +24,9 @@ #include #include -#include +#include +#include "ur_bindings.hpp" #include "usm_allocator.hpp" extern "C" { @@ -90,18 +91,12 @@ static const bool ReuseDiscardedEvents = [] { return std::stoi(ReuseDiscardedEventsFlag) > 0; }(); -// Controls PI level tracing prints. -static bool PrintPiTrace = false; - // Controls support of the indirect access kernels and deferred memory release. static const bool IndirectAccessTrackingEnabled = [] { return std::getenv("SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY") != nullptr; }(); -// This will count the calls to Level-Zero -static std::map *ZeCallCount = nullptr; - // Map from L0 to PI result static inline pi_result mapError(ze_result_t Result) { return ur2piResult(ze2urResult(Result)); @@ -118,7 +113,7 @@ static inline pi_result mapError(ze_result_t Result) { // Trace an internal PI call; returns in case of an error. #define PI_CALL(Call) \ { \ - if (PrintPiTrace) \ + if (PrintTrace) \ fprintf(stderr, "PI ---> %s\n", #Call); \ pi_result Result = (Call); \ if (Result != PI_SUCCESS) \ @@ -352,15 +347,6 @@ static bool CopyEngineRequested(pi_device Device) { } // Global variables used in PI_Level_Zero -// Note we only create a simple pointer variables such that C++ RT won't -// deallocate them automatically at the end of the main program. -// The heap memory allocated for these global variables reclaimed only when -// Sycl RT calls piTearDown(). -static std::vector *PiPlatformsCache = - new std::vector; -static sycl::detail::SpinLock *PiPlatformsCacheMutex = - new sycl::detail::SpinLock; -static bool PiPlatformCachePopulated = false; pi_result _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool, @@ -470,55 +456,6 @@ static pi_result enqueueMemCopyRectHelper( const pi_event *EventWaitList, pi_event *Event, bool PreferCopyEngine = false); -inline void zeParseError(ze_result_t ZeError, const char *&ErrorString) { - switch (ZeError) { -#define ZE_ERRCASE(ERR) \ - case ERR: \ - ErrorString = "" #ERR; \ - break; - - ZE_ERRCASE(ZE_RESULT_SUCCESS) - ZE_ERRCASE(ZE_RESULT_NOT_READY) - ZE_ERRCASE(ZE_RESULT_ERROR_DEVICE_LOST) - ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY) - ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) - ZE_ERRCASE(ZE_RESULT_ERROR_MODULE_BUILD_FAILURE) - ZE_ERRCASE(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS) - ZE_ERRCASE(ZE_RESULT_ERROR_NOT_AVAILABLE) - ZE_ERRCASE(ZE_RESULT_ERROR_UNINITIALIZED) - ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_VERSION) - ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ARGUMENT) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_HANDLE) - ZE_ERRCASE(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_POINTER) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SIZE) - ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_SIZE) - ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ENUMERATION) - ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION) - ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NATIVE_BINARY) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_NAME) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_NAME) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_FUNCTION_NAME) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE) - ZE_ERRCASE(ZE_RESULT_ERROR_OVERLAPPING_REGIONS) - ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED) - ZE_ERRCASE(ZE_RESULT_ERROR_UNKNOWN) - -#undef ZE_ERRCASE - default: - assert(false && "Unexpected Error code"); - } // switch -} - // Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR constexpr size_t MaxMessageSize = 256; thread_local pi_result ErrorMessageCode = PI_SUCCESS; @@ -538,26 +475,6 @@ pi_result piPluginGetLastError(char **message) { return ErrorMessageCode; } -ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName, - const char *ZeArgs, bool TraceError) { - zePrint("ZE ---> %s%s\n", ZeName, ZeArgs); - - if (ZeDebug & ZE_DEBUG_CALL_COUNT) { - ++(*ZeCallCount)[ZeName]; - } - - if (ZeResult && TraceError) { - const char *ErrorString = "Unknown"; - zeParseError(ZeResult, ErrorString); - zePrint("Error (%s) in %s\n", ErrorString, ZeName); - } - return ZeResult; -} - -#define PI_ASSERT(condition, error) \ - if (!(condition)) \ - return error; - bool _pi_queue::doReuseDiscardedEvents() { return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents(); } @@ -1580,7 +1497,7 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, // traces incurs much different timings than real execution // ansyway, and many regression tests use it. // - bool CurrentlyEmpty = !PrintPiTrace && this->LastCommandEvent == nullptr; + bool CurrentlyEmpty = !PrintTrace && this->LastCommandEvent == nullptr; // The list can be empty if command-list only contains signals of proxy // events. It is possible that executeCommandList is called twice for the same @@ -2238,117 +2155,7 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule, pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, pi_uint32 *NumPlatforms) { - - static const char *PiTrace = std::getenv("SYCL_PI_TRACE"); - static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0; - if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces - PrintPiTrace = true; - } - - static std::once_flag ZeCallCountInitialized; - try { - std::call_once(ZeCallCountInitialized, []() { - if (ZeDebug & ZE_DEBUG_CALL_COUNT) { - ZeCallCount = new std::map; - } - }); - } catch (const std::bad_alloc &) { - return PI_ERROR_OUT_OF_HOST_MEMORY; - } catch (...) { - return PI_ERROR_UNKNOWN; - } - - if (NumEntries == 0 && Platforms != nullptr) { - return PI_ERROR_INVALID_VALUE; - } - if (Platforms == nullptr && NumPlatforms == nullptr) { - return PI_ERROR_INVALID_VALUE; - } - - // Setting these environment variables before running zeInit will enable the - // validation layer in the Level Zero loader. - if (ZeDebug & ZE_DEBUG_VALIDATION) { - setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1"); - setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1"); - } - - // Enable SYSMAN support for obtaining the PCI address - // and maximum memory bandwidth. - if (getenv("SYCL_ENABLE_PCI") != nullptr) { - setEnvVar("ZES_ENABLE_SYSMAN", "1"); - } - - // TODO: We can still safely recover if something goes wrong during the init. - // Implement handling segfault using sigaction. - - // We must only initialize the driver once, even if piPlatformsGet() is called - // multiple times. Declaring the return value as "static" ensures it's only - // called once. - static ze_result_t ZeResult = ZE_CALL_NOCHECK(zeInit, (0)); - - // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. - if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { - PI_ASSERT(NumPlatforms != 0, PI_ERROR_INVALID_VALUE); - *NumPlatforms = 0; - return PI_SUCCESS; - } - - if (ZeResult != ZE_RESULT_SUCCESS) { - zePrint("zeInit: Level Zero initialization failure\n"); - return mapError(ZeResult); - } - - // Cache pi_platforms for reuse in the future - // It solves two problems; - // 1. sycl::platform equality issue; we always return the same pi_platform. - // 2. performance; we can save time by immediately return from cache. - // - - const std::lock_guard Lock{*PiPlatformsCacheMutex}; - if (!PiPlatformCachePopulated) { - try { - // Level Zero does not have concept of Platforms, but Level Zero driver is - // the closest match. - uint32_t ZeDriverCount = 0; - ZE_CALL(zeDriverGet, (&ZeDriverCount, nullptr)); - if (ZeDriverCount == 0) { - PiPlatformCachePopulated = true; - } else { - std::vector ZeDrivers; - ZeDrivers.resize(ZeDriverCount); - - ZE_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data())); - for (uint32_t I = 0; I < ZeDriverCount; ++I) { - pi_platform Platform = new _pi_platform(ZeDrivers[I]); - // Save a copy in the cache for future uses. - PiPlatformsCache->push_back(Platform); - - pi_result Result = Platform->initialize(); - if (Result != PI_SUCCESS) { - return Result; - } - } - PiPlatformCachePopulated = true; - } - } catch (const std::bad_alloc &) { - return PI_ERROR_OUT_OF_HOST_MEMORY; - } catch (...) { - return PI_ERROR_UNKNOWN; - } - } - - // Populate returned platforms from the cache. - if (Platforms) { - PI_ASSERT(NumEntries <= PiPlatformsCache->size(), - PI_ERROR_INVALID_PLATFORM); - std::copy_n(PiPlatformsCache->begin(), NumEntries, Platforms); - } - - if (NumPlatforms) { - *NumPlatforms = PiPlatformsCache->size(); - } - - return PI_SUCCESS; + return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms); } pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, @@ -3368,10 +3175,10 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, // TODO: maybe we should populate cache of platforms if it wasn't already. // For now assert that is was populated. PI_ASSERT(PiPlatformCachePopulated, PI_ERROR_INVALID_VALUE); - const std::lock_guard Lock{*PiPlatformsCacheMutex}; + const std::lock_guard Lock{*PiPlatformsCacheMutex}; pi_device Dev = nullptr; - for (auto &ThePlatform : *PiPlatformsCache) { + for (pi_platform ThePlatform : *PiPlatformsCache) { Dev = ThePlatform->getDeviceFromNativeHandle(ZeDevice); if (Dev) { // Check that the input Platform, if was given, matches the found one. @@ -8014,8 +7821,7 @@ pi_result USMSharedReadOnlyMemoryAlloc::allocateImpl(void **ResultPtr, pi_uint32 Alignment) { pi_usm_mem_properties Props[] = {PI_MEM_ALLOC_FLAGS, PI_MEM_ALLOC_DEVICE_READ_ONLY, 0}; - return USMSharedAllocImpl(ResultPtr, Context, Device, Props, Size, - Alignment); + return USMSharedAllocImpl(ResultPtr, Context, Device, Props, Size, Alignment); } pi_result USMDeviceMemoryAlloc::allocateImpl(void **ResultPtr, size_t Size, @@ -8781,7 +8587,7 @@ pi_result piTearDown(void *PluginParameter) { (void)PluginParameter; bool LeakFound = false; // reclaim pi_platform objects here since we don't have piPlatformRelease. - for (pi_platform &Platform : *PiPlatformsCache) { + for (pi_platform Platform : *PiPlatformsCache) { delete Platform; } delete PiPlatformsCache; diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 2712120e2f940..dc43e97ea3bc7 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -40,13 +40,13 @@ #include #include -#include -#include #include +#include +#include // Share code between this PI L0 Plugin and UR L0 Adapter -#include #include +#include #include "usm_allocator.hpp" @@ -56,7 +56,7 @@ template To pi_cast(From Value) { return (To)(Value); } -template <> uint32_t pi_cast(uint64_t Value) { +template <> uint32_t inline pi_cast(uint64_t Value) { // Cast value and check that we don't lose any information. uint32_t CastedValue = (uint32_t)(Value); assert((uint64_t)CastedValue == Value); @@ -178,13 +178,8 @@ struct MemAllocRecord : _pi_object { // Define the types that are opaque in pi.h in a manner suitabale for Level Zero // plugin -struct _pi_platform : public _ur_level_zero_platform { - _pi_platform(ze_driver_handle_t Driver) : _ur_level_zero_platform{Driver} {} - - // Performs initialization of a newly constructed PI platform. - pi_result initialize() { - return ur2piResult(_ur_level_zero_platform::initialize()); - } +struct _pi_platform : public ur_adapter_platform_handle_t_ { + using ur_adapter_platform_handle_t_::ur_adapter_platform_handle_t_; // Cache pi_devices for reuse std::vector> PiDevicesCache; diff --git a/sycl/plugins/level_zero/tracing.cpp b/sycl/plugins/level_zero/tracing.cpp index 32ee0f31fec90..17ea79359ea1f 100644 --- a/sycl/plugins/level_zero/tracing.cpp +++ b/sycl/plugins/level_zero/tracing.cpp @@ -8,9 +8,9 @@ #include "xpti/xpti_data_types.h" #include -#include -#include +#include #include +#include #include diff --git a/sycl/plugins/level_zero/ur_bindings.hpp b/sycl/plugins/level_zero/ur_bindings.hpp new file mode 100644 index 0000000000000..e37c0a7cf3e63 --- /dev/null +++ b/sycl/plugins/level_zero/ur_bindings.hpp @@ -0,0 +1,17 @@ +//===------ ur_bindings.hpp - Complete definitions of UR handles -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// +#pragma once + +#include "pi_level_zero.hpp" +#include + +// Make the Unified Runtime handles definition complete. +// This is used in various "create" API where new handles are allocated. +struct ur_platform_handle_t_ : public _pi_platform { + using _pi_platform::_pi_platform; +}; diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index f442db4a4f942..9c663a081f009 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -1,78 +1,49 @@ # PI Unified Runtime plugin library # - -include(FetchContent) - -set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") -set(UNIFIED_RUNTIME_TAG fd711c920acc4434cb52ff18b078c082d9d7f44d) - -message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") -FetchContent_Declare(unified-runtime - GIT_REPOSITORY ${UNIFIED_RUNTIME_REPO} - GIT_TAG ${UNIFIED_RUNTIME_TAG} +if (NOT DEFINED UNIFIED_RUNTIME_INCLUDE_DIR) + include(FetchContent) + + # TODO use oneapi-src once fixes are merged + set(UNIFIED_RUNTIME_REPO "https://github.com/igchor/unified-runtime.git") + set(UNIFIED_RUNTIME_TAG c1724990f3e37df08d9607f636385f65dbecb16f) + + message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") + FetchContent_Declare(unified-runtime + GIT_REPOSITORY ${UNIFIED_RUNTIME_REPO} + GIT_TAG ${UNIFIED_RUNTIME_TAG} + ) + + FetchContent_MakeAvailable(unified-runtime) + FetchContent_GetProperties(unified-runtime) + + set(UNIFIED_RUNTIME_LIBRARY unified-runtime::loader) + set(UNIFIED_RUNTIME_SOURCE_DIR + ${unified-runtime_SOURCE_DIR} CACHE PATH "Path to Unified Runtime Headers") + set(UNIFIED_RUNTIME_INCLUDE_DIR "${UNIFIED_RUNTIME_SOURCE_DIR}/include") +endif() + +add_library (UnifiedRuntimeLoader INTERFACE) +target_link_libraries(UnifiedRuntimeLoader + INTERFACE "${UNIFIED_RUNTIME_LIBRARY}" ) -FetchContent_MakeAvailable(unified-runtime) -FetchContent_GetProperties(unified-runtime) - -set(UNIFIED_RUNTIME_SOURCE_DIR - ${unified-runtime_SOURCE_DIR} CACHE PATH "Path to Unified Runtime Headers") -set(UNIFIED_RUNTIME_INCLUDE_DIR "${UNIFIED_RUNTIME_SOURCE_DIR}/include") - - -#include_directories("${LEVEL_ZERO_INCLUDE_DIR}") -include_directories("${UNIFIED_RUNTIME_INCLUDE_DIR}") - add_library (UnifiedRuntime-Headers INTERFACE) target_include_directories(UnifiedRuntime-Headers - INTERFACE "${UNIFIED_RUNTIME_INCLUDE_DIR}" + INTERFACE + "${UNIFIED_RUNTIME_INCLUDE_DIR}" ) -list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS unified-runtime) - find_package(Threads REQUIRED) -# -# NOTE: the Unified Runtime doesn't have the loader [yet]. -# So what we really build is the Unified Runtime with Level Zero Adapter -# together. -# -# TODO: begin -# Unified Runtime today is not a plugin yet. It's experimental static -# library that's linked into level_zero plugin. As soon as it's ready to -# become a plugin code below should be replaced with the following: -#add_sycl_plugin(unified_runtime -# SOURCES -# "${sycl_inc_dir}/sycl/detail/pi.h" -# "${CMAKE_CURRENT_SOURCE_DIR}/pi2ur.cpp" -# "${CMAKE_CURRENT_SOURCE_DIR}/pi2ur.hpp" -# "${CMAKE_CURRENT_SOURCE_DIR}/ur.hpp" -# "${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.hpp" -# "${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.cpp" -# LIBRARIES -# Threads::Threads -# UnifiedRuntime-Headers -# LevelZeroLoader-Headers -#) - -add_library(unified_runtime STATIC - "${sycl_inc_dir}/sycl/detail/pi.h" - "${CMAKE_CURRENT_SOURCE_DIR}/ur.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.hpp" - "${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.cpp" +# Build Unified Runtime Level Zero Adapter +add_subdirectory(ur/adapters/level_zero) + +add_sycl_plugin(unified_runtime + SOURCES + "pi_unified_runtime.hpp" + "pi_unified_runtime.cpp" + LIBRARIES + Threads::Threads + UnifiedRuntimeLoader + UnifiedRuntime-Headers ) - -target_include_directories(unified_runtime INTERFACE - "${UNIFIED_RUNTIME_INCLUDE_DIR}" - "${CMAKE_CURRENT_SOURCE_DIR}" -) - -target_link_libraries(unified_runtime PRIVATE - UnifiedRuntime-Headers - LevelZeroLoader-Headers -) - -# TODO: end - -add_dependencies(unified_runtime ze-api) - diff --git a/sycl/plugins/unified_runtime/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/adapters/level_zero/ur_level_zero.cpp deleted file mode 100644 index 71d184d29a8a8..0000000000000 --- a/sycl/plugins/unified_runtime/adapters/level_zero/ur_level_zero.cpp +++ /dev/null @@ -1,206 +0,0 @@ -//===--------- ur_level_zero.hpp - Level Zero Adapter -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===-----------------------------------------------------------------===// - -#include - -#include "ur_level_zero.hpp" - -// Define the static class field -std::mutex ZeCall::GlobalLock; - -ZeUSMImportExtension ZeUSMImport; - -void zePrint(const char *Format, ...) { - if (ZeDebug & ZE_DEBUG_BASIC) { - va_list Args; - va_start(Args, Format); - vfprintf(stderr, Format, Args); - va_end(Args); - } -} - -// This function will ensure compatibility with both Linux and Windows for -// setting environment variables. -bool setEnvVar(const char *name, const char *value) { -#ifdef _WIN32 - int Res = _putenv_s(name, value); -#else - int Res = setenv(name, value, 1); -#endif - if (Res != 0) { - zePrint( - "Level Zero plugin was unable to set the environment variable: %s\n", - name); - return false; - } - return true; -} - -// Trace a call to Level-Zero RT -#define ZE_CALL(ZeName, ZeArgs) \ - { \ - ze_result_t ZeResult = ZeName ZeArgs; \ - if (auto Result = ZeCall().doCall(ZeResult, #ZeName, #ZeArgs, true)) \ - return ze2urResult(Result); \ - } - -// Specializations for various L0 structures -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_FENCE_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_CONTEXT_DESC; -} -template <> -ze_structure_type_t -getZeStructureType() { - return ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_IMAGE_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_MODULE_DESC; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_KERNEL_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_EVENT_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_SAMPLER_DESC; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_IMAGE_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_MODULE_PROPERTIES; -} -template <> ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; -} -template <> -ze_structure_type_t getZeStructureType() { - return ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; -} - -template <> zes_structure_type_t getZesStructureType() { - return ZES_STRUCTURE_TYPE_PCI_PROPERTIES; -} - -template <> zes_structure_type_t getZesStructureType() { - return ZES_STRUCTURE_TYPE_MEM_STATE; -} - -template <> zes_structure_type_t getZesStructureType() { - return ZES_STRUCTURE_TYPE_MEM_PROPERTIES; -} - -zer_result_t _ur_level_zero_platform::initialize() { - // Cache driver properties - ZeStruct ZeDriverProperties; - ZE_CALL(zeDriverGetProperties, (ZeDriver, &ZeDriverProperties)); - uint32_t DriverVersion = ZeDriverProperties.driverVersion; - // Intel Level-Zero GPU driver stores version as: - // | 31 - 24 | 23 - 16 | 15 - 0 | - // | Major | Minor | Build | - auto VersionMajor = std::to_string((DriverVersion & 0xFF000000) >> 24); - auto VersionMinor = std::to_string((DriverVersion & 0x00FF0000) >> 16); - auto VersionBuild = std::to_string(DriverVersion & 0x0000FFFF); - ZeDriverVersion = VersionMajor + "." + VersionMinor + "." + VersionBuild; - - ZE_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion)); - ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." + - std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); - - // Cache driver extension properties - uint32_t Count = 0; - ZE_CALL(zeDriverGetExtensionProperties, (ZeDriver, &Count, nullptr)); - - std::vector ZeExtensions(Count); - - ZE_CALL(zeDriverGetExtensionProperties, - (ZeDriver, &Count, ZeExtensions.data())); - - for (auto extension : ZeExtensions) { - // Check if global offset extension is available - if (strncmp(extension.name, ZE_GLOBAL_OFFSET_EXP_NAME, - strlen(ZE_GLOBAL_OFFSET_EXP_NAME) + 1) == 0) { - if (extension.version == ZE_GLOBAL_OFFSET_EXP_VERSION_1_0) { - ZeDriverGlobalOffsetExtensionFound = true; - } - } - // Check if extension is available for "static linking" (compiling multiple - // SPIR-V modules together into one Level Zero module). - if (strncmp(extension.name, ZE_MODULE_PROGRAM_EXP_NAME, - strlen(ZE_MODULE_PROGRAM_EXP_NAME) + 1) == 0) { - if (extension.version == ZE_MODULE_PROGRAM_EXP_VERSION_1_0) { - ZeDriverModuleProgramExtensionFound = true; - } - } - zeDriverExtensionMap[extension.name] = extension.version; - } - - // Check if import user ptr into USM feature has been requested. - // If yes, then set up L0 API pointers if the platform supports it. - ZeUSMImport.setZeUSMImport(this); - - return ZER_RESULT_SUCCESS; -} diff --git a/sycl/plugins/unified_runtime/pi2ur.cpp b/sycl/plugins/unified_runtime/pi2ur.cpp index a10ca7534ef14..a80fa92f55c31 100644 --- a/sycl/plugins/unified_runtime/pi2ur.cpp +++ b/sycl/plugins/unified_runtime/pi2ur.cpp @@ -8,34 +8,3 @@ // This thin layer performs conversion from PI API to Unified Runtime API // TODO: remove when SYCL RT is changed to talk in UR directly - -#include -#include - -// Early exits on any error -#define HANDLE_ERRORS(urCall) \ - if (auto Result = urCall) \ - return ur2piResult(Result); - -__SYCL_EXPORT pi_result piPlatformsGet(pi_uint32 num_entries, - pi_platform *platforms, - pi_uint32 *num_platforms) { - - // https://spec.oneapi.io/unified-runtime/latest/core/api.html#zerplatformget - - uint32_t Count = num_entries; - auto phPlatforms = reinterpret_cast(platforms); - HANDLE_ERRORS(zerPlatformGet(&Count, phPlatforms)); - if (*num_platforms) { - *num_platforms = Count; - } - return PI_SUCCESS; -} - -__SYCL_EXPORT pi_result piPlatformGetInfo(pi_platform platform, - pi_platform_info param_name, - size_t param_value_size, - void *param_value, - size_t *param_value_size_ret) { - die("Unified Runtime: piPlatformGetInfo is not implemented"); -} diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp old mode 100755 new mode 100644 index d461bac4a2cfb..8efa8f3fd36ee --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -1,4 +1,4 @@ -//===---------------- pi2ur.cpp - PI API to UR API --------------------==// +//===---------------- pi2ur.hpp - PI API to UR API --------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,32 +9,34 @@ #include -#include "zer_api.h" +#include "ur_api.h" #include +#include // Map of UR error codes to PI error codes -static pi_result ur2piResult(zer_result_t urResult) { +static pi_result ur2piResult(ur_result_t urResult) { // TODO: replace "global lifetime" objects with a non-trivial d'tor with // either pointers to such objects (which would be allocated and dealocated // during init and teardown) or objects with trivial d'tor. // E.g. for this case we could have an std::array with sorted values. // - static std::unordered_map ErrorMapping = { - {ZER_RESULT_SUCCESS, PI_SUCCESS}, - {ZER_RESULT_ERROR_DEVICE_LOST, PI_ERROR_DEVICE_NOT_FOUND}, - {ZER_RESULT_INVALID_OPERATION, PI_ERROR_INVALID_OPERATION}, - {ZER_RESULT_INVALID_PLATFORM, PI_ERROR_INVALID_PLATFORM}, - {ZER_RESULT_ERROR_INVALID_ARGUMENT, PI_ERROR_INVALID_ARG_VALUE}, - {ZER_RESULT_INVALID_VALUE, PI_ERROR_INVALID_VALUE}, - {ZER_RESULT_INVALID_EVENT, PI_ERROR_INVALID_EVENT}, - {ZER_RESULT_INVALID_BINARY, PI_ERROR_INVALID_BINARY}, - {ZER_RESULT_INVALID_KERNEL_NAME, PI_ERROR_INVALID_KERNEL_NAME}, - {ZER_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_ERROR_BUILD_PROGRAM_FAILURE}, - {ZER_RESULT_INVALID_WORK_GROUP_SIZE, PI_ERROR_INVALID_WORK_GROUP_SIZE}, - {ZER_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_ERROR_BUILD_PROGRAM_FAILURE}, - {ZER_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, PI_ERROR_OUT_OF_RESOURCES}, - {ZER_RESULT_ERROR_OUT_OF_HOST_MEMORY, PI_ERROR_OUT_OF_HOST_MEMORY}}; + static std::unordered_map ErrorMapping = { + {UR_RESULT_SUCCESS, PI_SUCCESS}, + {UR_RESULT_ERROR_UNKNOWN, PI_ERROR_UNKNOWN}, + {UR_RESULT_ERROR_DEVICE_LOST, PI_ERROR_DEVICE_NOT_FOUND}, + {UR_RESULT_ERROR_INVALID_OPERATION, PI_ERROR_INVALID_OPERATION}, + {UR_RESULT_ERROR_INVALID_PLATFORM, PI_ERROR_INVALID_PLATFORM}, + {UR_RESULT_ERROR_INVALID_ARGUMENT, PI_ERROR_INVALID_ARG_VALUE}, + {UR_RESULT_ERROR_INVALID_VALUE, PI_ERROR_INVALID_VALUE}, + {UR_RESULT_ERROR_INVALID_EVENT, PI_ERROR_INVALID_EVENT}, + {UR_RESULT_ERROR_INVALID_BINARY, PI_ERROR_INVALID_BINARY}, + {UR_RESULT_ERROR_INVALID_KERNEL_NAME, PI_ERROR_INVALID_KERNEL_NAME}, + {UR_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_ERROR_BUILD_PROGRAM_FAILURE}, + {UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE, PI_ERROR_INVALID_WORK_GROUP_SIZE}, + {UR_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_ERROR_BUILD_PROGRAM_FAILURE}, + {UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, PI_ERROR_OUT_OF_RESOURCES}, + {UR_RESULT_ERROR_OUT_OF_HOST_MEMORY, PI_ERROR_OUT_OF_HOST_MEMORY}}; auto It = ErrorMapping.find(urResult); if (It == ErrorMapping.end()) { @@ -42,3 +44,26 @@ static pi_result ur2piResult(zer_result_t urResult) { } return It->second; } + +// Early exits on any error +#define HANDLE_ERRORS(urCall) \ + if (auto Result = urCall) \ + return ur2piResult(Result); + +namespace pi2ur { +inline pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, + pi_uint32 *num_platforms) { + + // https://spec.oneapi.io/unified-runtime/latest/core/api.html#zerplatformget + + auto phPlatforms = reinterpret_cast(platforms); + return ur2piResult(urPlatformGet(num_entries, phPlatforms, num_platforms)); +} + +inline pi_result piPlatformGetInfo(pi_platform platform, + pi_platform_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + die("Unified Runtime: piPlatformGetInfo is not implemented"); +} +} // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp new file mode 100644 index 0000000000000..38e971b345443 --- /dev/null +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -0,0 +1,31 @@ +//===--- pi_unified_runtime.cpp - Unified Runtime PI Plugin -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// + +#include + +extern "C" { +__SYCL_EXPORT pi_result piPlatformsGet(pi_uint32 num_entries, + pi_platform *platforms, + pi_uint32 *num_platforms) { + return pi2ur::piPlatformsGet(num_entries, platforms, num_platforms); +} + +pi_result piPluginInit(pi_plugin *PluginInit) { + PI_ASSERT(PluginInit, PI_ERROR_INVALID_VALUE); + + // Check that the major version matches in PiVersion and SupportedVersion + // TODO + + (PluginInit->PiFunctionTable).piPlatformsGet = &piPlatformsGet; + + return PI_SUCCESS; +} + +pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } + +} // extern "C diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.hpp b/sycl/plugins/unified_runtime/pi_unified_runtime.hpp new file mode 100644 index 0000000000000..1d40e4b9eb559 --- /dev/null +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.hpp @@ -0,0 +1,8 @@ +//===--- pi_unified_runtime.hpp - Unified Runtime PI Plugin -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// +#pragma once diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/CMakeLists.txt b/sycl/plugins/unified_runtime/ur/adapters/level_zero/CMakeLists.txt new file mode 100644 index 0000000000000..524bee48526f1 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/CMakeLists.txt @@ -0,0 +1,37 @@ +set(TARGET_NAME ur_level_zero) + +add_library(${TARGET_NAME} + SHARED + ${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.hpp + # These are short-term shared with Unified Runtime + # The two plugins define a few things differrently so must + # be built separately. This difference is spelled in + # their "ur_bindings.hpp" files. + # + "../../../ur_bindings.hpp" + "../../../pi2ur.hpp" + "../../../pi2ur.cpp" + "../../ur.hpp" + "../../ur.cpp" +) + +# set_target_properties(${TARGET_NAME} PROPERTIES +# VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}" +# SOVERSION "${PROJECT_VERSION_MAJOR}" +# ) + +target_include_directories(${TARGET_NAME} + PRIVATE "${LEVEL_ZERO_INCLUDE_DIR}" +) + +target_link_libraries(${TARGET_NAME} + PRIVATE + LevelZeroLoader-Headers + LevelZeroLoader +) + +# if(UNIX) +# set(GCC_COVERAGE_COMPILE_FLAGS "-fvisibility=hidden -fvisibility-inlines-hidden -fno-strict-aliasing") +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GCC_COVERAGE_COMPILE_FLAGS}") +# endif() diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp new file mode 100644 index 0000000000000..f3e43e8d6dab8 --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -0,0 +1,393 @@ +//===--------- ur_level_zero.hpp - Level Zero Adapter -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include +#include + +#include "ur_level_zero.hpp" +#include + +// Define the static class field +std::mutex ZeCall::GlobalLock; + +ZeUSMImportExtension ZeUSMImport; + +void zePrint(const char *Format, ...) { + if (ZeDebug & ZE_DEBUG_BASIC) { + va_list Args; + va_start(Args, Format); + vfprintf(stderr, Format, Args); + va_end(Args); + } +} + +// This function will ensure compatibility with both Linux and Windows for +// setting environment variables. +bool setEnvVar(const char *name, const char *value) { +#ifdef _WIN32 + int Res = _putenv_s(name, value); +#else + int Res = setenv(name, value, 1); +#endif + if (Res != 0) { + zePrint( + "Level Zero plugin was unable to set the environment variable: %s\n", + name); + return false; + } + return true; +} + +// Trace a call to Level-Zero RT +#define ZE_CALL(ZeName, ZeArgs) \ + { \ + ze_result_t ZeResult = ZeName ZeArgs; \ + if (auto Result = ZeCall().doCall(ZeResult, #ZeName, #ZeArgs, true)) \ + return ze2urResult(Result); \ + } + +// This will count the calls to Level-Zero +std::map *ZeCallCount = nullptr; + +inline void zeParseError(ze_result_t ZeError, const char *&ErrorString) { + switch (ZeError) { +#define ZE_ERRCASE(ERR) \ + case ERR: \ + ErrorString = "" #ERR; \ + break; + + ZE_ERRCASE(ZE_RESULT_SUCCESS) + ZE_ERRCASE(ZE_RESULT_NOT_READY) + ZE_ERRCASE(ZE_RESULT_ERROR_DEVICE_LOST) + ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY) + ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) + ZE_ERRCASE(ZE_RESULT_ERROR_MODULE_BUILD_FAILURE) + ZE_ERRCASE(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS) + ZE_ERRCASE(ZE_RESULT_ERROR_NOT_AVAILABLE) + ZE_ERRCASE(ZE_RESULT_ERROR_UNINITIALIZED) + ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_VERSION) + ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ARGUMENT) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_HANDLE) + ZE_ERRCASE(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_POINTER) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SIZE) + ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_SIZE) + ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ENUMERATION) + ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION) + ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NATIVE_BINARY) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_NAME) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_NAME) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_FUNCTION_NAME) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE) + ZE_ERRCASE(ZE_RESULT_ERROR_OVERLAPPING_REGIONS) + ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED) + ZE_ERRCASE(ZE_RESULT_ERROR_UNKNOWN) + +#undef ZE_ERRCASE + default: + assert(false && "Unexpected Error code"); + } // switch +} + +ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName, + const char *ZeArgs, bool TraceError) { + zePrint("ZE ---> %s%s\n", ZeName, ZeArgs); + + if (ZeDebug & ZE_DEBUG_CALL_COUNT) { + ++(*ZeCallCount)[ZeName]; + } + + if (ZeResult && TraceError) { + const char *ErrorString = "Unknown"; + zeParseError(ZeResult, ErrorString); + zePrint("Error (%s) in %s\n", ErrorString, ZeName); + } + return ZeResult; +} + +// Specializations for various L0 structures +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_EVENT_POOL_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_FENCE_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_CONTEXT_DESC; +} +template <> +ze_structure_type_t +getZeStructureType() { + return ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_IMAGE_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_MODULE_DESC; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_MODULE_PROGRAM_EXP_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_KERNEL_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_EVENT_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_SAMPLER_DESC; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_IMAGE_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_MODULE_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_CACHE_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_MODULE_PROPERTIES; +} +template <> ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_KERNEL_PROPERTIES; +} +template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; +} + +template <> zes_structure_type_t getZesStructureType() { + return ZES_STRUCTURE_TYPE_PCI_PROPERTIES; +} + +template <> zes_structure_type_t getZesStructureType() { + return ZES_STRUCTURE_TYPE_MEM_STATE; +} + +template <> zes_structure_type_t getZesStructureType() { + return ZES_STRUCTURE_TYPE_MEM_PROPERTIES; +} + +ur_result_t ur_adapter_platform_handle_t_::initialize() { + // Cache driver properties + ZeStruct ZeDriverProperties; + ZE_CALL(zeDriverGetProperties, (ZeDriver, &ZeDriverProperties)); + uint32_t DriverVersion = ZeDriverProperties.driverVersion; + // Intel Level-Zero GPU driver stores version as: + // | 31 - 24 | 23 - 16 | 15 - 0 | + // | Major | Minor | Build | + auto VersionMajor = std::to_string((DriverVersion & 0xFF000000) >> 24); + auto VersionMinor = std::to_string((DriverVersion & 0x00FF0000) >> 16); + auto VersionBuild = std::to_string(DriverVersion & 0x0000FFFF); + ZeDriverVersion = VersionMajor + "." + VersionMinor + "." + VersionBuild; + + ZE_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion)); + ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." + + std::to_string(ZE_MINOR_VERSION(ZeApiVersion)); + + // Cache driver extension properties + uint32_t Count = 0; + ZE_CALL(zeDriverGetExtensionProperties, (ZeDriver, &Count, nullptr)); + + std::vector ZeExtensions(Count); + + ZE_CALL(zeDriverGetExtensionProperties, + (ZeDriver, &Count, ZeExtensions.data())); + + for (auto extension : ZeExtensions) { + // Check if global offset extension is available + if (strncmp(extension.name, ZE_GLOBAL_OFFSET_EXP_NAME, + strlen(ZE_GLOBAL_OFFSET_EXP_NAME) + 1) == 0) { + if (extension.version == ZE_GLOBAL_OFFSET_EXP_VERSION_1_0) { + ZeDriverGlobalOffsetExtensionFound = true; + } + } + // Check if extension is available for "static linking" (compiling multiple + // SPIR-V modules together into one Level Zero module). + if (strncmp(extension.name, ZE_MODULE_PROGRAM_EXP_NAME, + strlen(ZE_MODULE_PROGRAM_EXP_NAME) + 1) == 0) { + if (extension.version == ZE_MODULE_PROGRAM_EXP_VERSION_1_0) { + ZeDriverModuleProgramExtensionFound = true; + } + } + zeDriverExtensionMap[extension.name] = extension.version; + } + + // Check if import user ptr into USM feature has been requested. + // If yes, then set up L0 API pointers if the platform supports it. + ZeUSMImport.setZeUSMImport(this); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urPlatformGet( + uint32_t NumEntries, ///< [in] the number of platforms to be added to phPlatforms. + ///< If phPlatforms is not NULL, then NumEntries should be greater than + ///< zero, otherwise ::UR_RESULT_ERROR_INVALID_SIZE, + ///< will be returned. + ur_platform_handle_t* phPlatforms, ///< [out][optional][range(0, NumEntries)] array of handle of platforms. + ///< If NumEntries is less than the number of platforms available, then + ///< ::urPlatformGet shall only retrieve that number of platforms. + uint32_t* pNumPlatforms ///< [out][optional] returns the total number of platforms available. + ) +{ + static std::once_flag ZeCallCountInitialized; + try { + std::call_once(ZeCallCountInitialized, []() { + if (ZeDebug & ZE_DEBUG_CALL_COUNT) { + ZeCallCount = new std::map; + } + }); + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + + if (NumEntries == 0 && phPlatforms != nullptr) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + if (phPlatforms == nullptr && pNumPlatforms == nullptr) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + // Setting these environment variables before running zeInit will enable the + // validation layer in the Level Zero loader. + if (ZeDebug & ZE_DEBUG_VALIDATION) { + setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1"); + setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1"); + } + + // Enable SYSMAN support for obtaining the PCI address + // and maximum memory bandwidth. + if (getenv("SYCL_ENABLE_PCI") != nullptr) { + setEnvVar("ZES_ENABLE_SYSMAN", "1"); + } + + // TODO: We can still safely recover if something goes wrong during the init. + // Implement handling segfault using sigaction. + + // We must only initialize the driver once, even if piPlatformsGet() is called + // multiple times. Declaring the return value as "static" ensures it's only + // called once. + static ze_result_t ZeResult = ZE_CALL_NOCHECK(zeInit, (0)); + + // Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms. + if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) { + PI_ASSERT(NumEntries != 0, UR_RESULT_ERROR_INVALID_VALUE); + *pNumPlatforms = 0; + return UR_RESULT_SUCCESS; + } + + if (ZeResult != ZE_RESULT_SUCCESS) { + zePrint("zeInit: Level Zero initialization failure\n"); + return ze2urResult(ZeResult); + } + + // Cache pi_platforms for reuse in the future + // It solves two problems; + // 1. sycl::platform equality issue; we always return the same pi_platform. + // 2. performance; we can save time by immediately return from cache. + // + + const std::lock_guard Lock{*PiPlatformsCacheMutex}; + if (!PiPlatformCachePopulated) { + try { + // Level Zero does not have concept of Platforms, but Level Zero driver is + // the closest match. + uint32_t ZeDriverCount = 0; + ZE_CALL(zeDriverGet, (&ZeDriverCount, nullptr)); + if (ZeDriverCount == 0) { + PiPlatformCachePopulated = true; + } else { + std::vector ZeDrivers; + ZeDrivers.resize(ZeDriverCount); + + ZE_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data())); + for (uint32_t I = 0; I < ZeDriverCount; ++I) { + auto Platform = new ur_platform_handle_t_(ZeDrivers[I]); + // Save a copy in the cache for future uses. + PiPlatformsCache->push_back(Platform); + + ur_result_t Result = Platform->initialize(); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + } + PiPlatformCachePopulated = true; + } + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + + // Populate returned platforms from the cache. + if (phPlatforms) { + PI_ASSERT(NumEntries <= PiPlatformsCache->size(), + UR_RESULT_ERROR_INVALID_PLATFORM); + std::copy_n(PiPlatformsCache->begin(), NumEntries, phPlatforms); + } + + if (pNumPlatforms) + *pNumPlatforms = PiPlatformsCache->size(); + + return UR_RESULT_SUCCESS; +} diff --git a/sycl/plugins/unified_runtime/adapters/level_zero/ur_level_zero.hpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp old mode 100755 new mode 100644 similarity index 73% rename from sycl/plugins/unified_runtime/adapters/level_zero/ur_level_zero.hpp rename to sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp index 9593d475af33a..dbd35259b49e3 --- a/sycl/plugins/unified_runtime/adapters/level_zero/ur_level_zero.hpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.hpp @@ -7,6 +7,7 @@ //===-----------------------------------------------------------------===// #pragma once +#include #include #include #include @@ -14,10 +15,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include // Returns the ze_structure_type_t to use in .stype of a structured descriptor. // Intentionally not defined; will give an error if no proper specialization @@ -80,40 +81,40 @@ class ZeCall { }; // Map Level Zero runtime error code to UR error code. -static zer_result_t ze2urResult(ze_result_t ZeResult) { - static std::unordered_map ErrorMapping = { - {ZE_RESULT_SUCCESS, ZER_RESULT_SUCCESS}, - {ZE_RESULT_ERROR_DEVICE_LOST, ZER_RESULT_ERROR_DEVICE_LOST}, - {ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, ZER_RESULT_INVALID_OPERATION}, - {ZE_RESULT_ERROR_NOT_AVAILABLE, ZER_RESULT_INVALID_OPERATION}, - {ZE_RESULT_ERROR_UNINITIALIZED, ZER_RESULT_INVALID_PLATFORM}, - {ZE_RESULT_ERROR_INVALID_ARGUMENT, ZER_RESULT_ERROR_INVALID_ARGUMENT}, - {ZE_RESULT_ERROR_INVALID_NULL_POINTER, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_INVALID_SIZE, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_SIZE, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, ZER_RESULT_INVALID_VALUE}, +static ur_result_t ze2urResult(ze_result_t ZeResult) { + static std::unordered_map ErrorMapping = { + {ZE_RESULT_SUCCESS, UR_RESULT_SUCCESS}, + {ZE_RESULT_ERROR_DEVICE_LOST, UR_RESULT_ERROR_DEVICE_LOST}, + {ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, UR_RESULT_ERROR_INVALID_OPERATION}, + {ZE_RESULT_ERROR_NOT_AVAILABLE, UR_RESULT_ERROR_INVALID_OPERATION}, + {ZE_RESULT_ERROR_UNINITIALIZED, UR_RESULT_ERROR_INVALID_PLATFORM}, + {ZE_RESULT_ERROR_INVALID_ARGUMENT, UR_RESULT_ERROR_INVALID_ARGUMENT}, + {ZE_RESULT_ERROR_INVALID_NULL_POINTER, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_INVALID_SIZE, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_SIZE, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT, UR_RESULT_ERROR_INVALID_VALUE}, {ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT, - ZER_RESULT_INVALID_EVENT}, - {ZE_RESULT_ERROR_INVALID_ENUMERATION, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, ZER_RESULT_INVALID_VALUE}, - {ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, ZER_RESULT_INVALID_BINARY}, - {ZE_RESULT_ERROR_INVALID_KERNEL_NAME, ZER_RESULT_INVALID_KERNEL_NAME}, + UR_RESULT_ERROR_INVALID_EVENT}, + {ZE_RESULT_ERROR_INVALID_ENUMERATION, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT, UR_RESULT_ERROR_INVALID_VALUE}, + {ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, UR_RESULT_ERROR_INVALID_BINARY}, + {ZE_RESULT_ERROR_INVALID_KERNEL_NAME, UR_RESULT_ERROR_INVALID_KERNEL_NAME}, {ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, - ZER_RESULT_ERROR_INVALID_FUNCTION_NAME}, - {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, ZER_RESULT_INVALID_OPERATION}, + UR_RESULT_ERROR_INVALID_FUNCTION_NAME}, + {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, UR_RESULT_ERROR_INVALID_OPERATION}, {ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, - ZER_RESULT_INVALID_WORK_GROUP_SIZE}, + UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE}, {ZE_RESULT_ERROR_MODULE_BUILD_FAILURE, - ZER_RESULT_ERROR_MODULE_BUILD_FAILURE}, + UR_RESULT_ERROR_MODULE_BUILD_FAILURE}, {ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, - ZER_RESULT_ERROR_OUT_OF_DEVICE_MEMORY}, + UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY}, {ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, - ZER_RESULT_ERROR_OUT_OF_HOST_MEMORY}}; + UR_RESULT_ERROR_OUT_OF_HOST_MEMORY}}; auto It = ErrorMapping.find(ZeResult); if (It == ErrorMapping.end()) { - return ZER_RESULT_ERROR_UNKNOWN; + return UR_RESULT_ERROR_UNKNOWN; } return It->second; } @@ -143,10 +144,10 @@ bool setEnvVar(const char *name, const char *value); #define ZE_CALL_NOCHECK(ZeName, ZeArgs) \ ZeCall().doCall(ZeName ZeArgs, #ZeName, #ZeArgs, false) -struct _ur_level_zero_platform : public _ur_platform { - _ur_level_zero_platform(ze_driver_handle_t Driver) : ZeDriver{Driver} {} +struct ur_adapter_platform_handle_t_ : public _ur_platform { + ur_adapter_platform_handle_t_(ze_driver_handle_t Driver) : ZeDriver{Driver} {} // Performs initialization of a newly constructed PI platform. - zer_result_t initialize(); + ur_result_t initialize(); // Level Zero lacks the notion of a platform, but there is a driver, which is // a pretty good fit to keep here. @@ -165,7 +166,7 @@ struct _ur_level_zero_platform : public _ur_platform { bool ZeDriverModuleProgramExtensionFound{false}; }; -using ur_level_zero_platform = _ur_level_zero_platform *; +using ur_adapter_platform_handle_t = ur_adapter_platform_handle_t_*; class ZeUSMImportExtension { // Pointers to functions that import/release host memory into USM @@ -179,7 +180,7 @@ class ZeUSMImportExtension { ZeUSMImportExtension() : Enabled{false} {} - void setZeUSMImport(ur_level_zero_platform Platform) { + void setZeUSMImport(ur_adapter_platform_handle_t Platform) { // Whether env var SYCL_USM_HOSTPTR_IMPORT has been set requesting // host ptr import during buffer creation. const char *USMHostPtrImportStr = std::getenv("SYCL_USM_HOSTPTR_IMPORT"); @@ -215,4 +216,8 @@ class ZeUSMImportExtension { } }; +// Helper wrapper for working with USM import extension in Level Zero. extern ZeUSMImportExtension ZeUSMImport; + +// This will count the calls to Level-Zero +extern std::map *ZeCallCount; diff --git a/sycl/plugins/unified_runtime/ur/ur.cpp b/sycl/plugins/unified_runtime/ur/ur.cpp new file mode 100644 index 0000000000000..49ebe9ef0d03e --- /dev/null +++ b/sycl/plugins/unified_runtime/ur/ur.cpp @@ -0,0 +1,26 @@ + +//===--------- ur.hpp - Unified Runtime -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------===// + +#include "ur.hpp" + +// Controls tracing UR calls from within the UR itself. +bool PrintTrace = [] { + const char *Trace = std::getenv("SYCL_PI_TRACE"); + const int TraceValue = Trace ? std::stoi(Trace) : 0; + if (TraceValue == -1 || TraceValue == 2) { // Means print all traces + return true; + } + return false; +}(); + +// Apparatus for maintaining immutable cache of platforms. +std::vector *PiPlatformsCache = + new std::vector; +SpinLock *PiPlatformsCacheMutex = new SpinLock; +bool PiPlatformCachePopulated = false; diff --git a/sycl/plugins/unified_runtime/ur.hpp b/sycl/plugins/unified_runtime/ur/ur.hpp old mode 100755 new mode 100644 similarity index 54% rename from sycl/plugins/unified_runtime/ur.hpp rename to sycl/plugins/unified_runtime/ur/ur.hpp index 63b4a54bb1ffd..c51dbf6c396e0 --- a/sycl/plugins/unified_runtime/ur.hpp +++ b/sycl/plugins/unified_runtime/ur/ur.hpp @@ -7,11 +7,14 @@ //===-----------------------------------------------------------------===// #pragma once +#include #include #include #include +#include +#include -#include +#include // Terminates the process with a catastrophic error message. [[noreturn]] inline void die(const char *Message) { @@ -32,15 +35,14 @@ static const bool SingleThreadMode = [] { // If SingleThreadMode variable is set then mutex operations are turned into // nop. class pi_shared_mutex { - std::shared_mutex Mutex; + std::shared_mutex Mutex; + public: void lock() { if (!SingleThreadMode) Mutex.lock(); } - bool try_lock() { - return SingleThreadMode ? true : Mutex.try_lock(); - } + bool try_lock() { return SingleThreadMode ? true : Mutex.try_lock(); } void unlock() { if (!SingleThreadMode) Mutex.unlock(); @@ -64,6 +66,7 @@ class pi_shared_mutex { // nop. class pi_mutex { std::mutex Mutex; + public: void lock() { if (!SingleThreadMode) @@ -76,5 +79,43 @@ class pi_mutex { } }; +/// SpinLock is a synchronization primitive, that uses atomic variable and +/// causes thread trying acquire lock wait in loop while repeatedly check if +/// the lock is available. +/// +/// One important feature of this implementation is that std::atomic can +/// be zero-initialized. This allows SpinLock to have trivial constructor and +/// destructor, which makes it possible to use it in global context (unlike +/// std::mutex, that doesn't provide such guarantees). +class SpinLock { +public: + void lock() { + while (MLock.test_and_set(std::memory_order_acquire)) + std::this_thread::yield(); + } + void unlock() { MLock.clear(std::memory_order_release); } + +private: + std::atomic_flag MLock = ATOMIC_FLAG_INIT; +}; + +// Helper for one-liner validation +#define PI_ASSERT(condition, error) \ + if (!(condition)) \ + return error; + // TODO: populate with target agnostic handling of UR platforms struct _ur_platform {}; + +// Controls tracing UR calls from within the UR itself. +extern bool PrintTrace; + +// Apparatus for maintaining immutable cache of platforms. +// +// Note we only create a simple pointer variables such that C++ RT won't +// deallocate them automatically at the end of the main program. +// The heap memory allocated for these global variables reclaimed only at +// explicit tear-down. +extern std::vector *PiPlatformsCache; +extern SpinLock *PiPlatformsCacheMutex; +extern bool PiPlatformCachePopulated; diff --git a/sycl/plugins/unified_runtime/ur_bindings.hpp b/sycl/plugins/unified_runtime/ur_bindings.hpp new file mode 100644 index 0000000000000..a3c97f90434db --- /dev/null +++ b/sycl/plugins/unified_runtime/ur_bindings.hpp @@ -0,0 +1,17 @@ +//===------ ur_bindings.hpp - Complete definitions of UR handles -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// +#pragma once + +#include +#include + +// Make the Unified Runtime handles definition complete. +// This is used in various "create" API where new handles are allocated. +struct ur_platform_handle_t_ : public ur_adapter_platform_handle_t_ { + using ur_adapter_platform_handle_t_::ur_adapter_platform_handle_t_; +}; diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 8e9d9e94a3223..a0b9451769409 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -37,6 +37,8 @@ static const plugin &getPlugin(backend Backend) { return pi::getPlugin(); case backend::ext_oneapi_cuda: return pi::getPlugin(); + case backend::unified_runtime: + return pi::getPlugin(); default: throw sycl::runtime_error{"Unsupported backend", PI_ERROR_INVALID_OPERATION}; diff --git a/sycl/source/detail/config.cpp b/sycl/source/detail/config.cpp index bd32be2988280..361af24496383 100644 --- a/sycl/source/detail/config.cpp +++ b/sycl/source/detail/config.cpp @@ -172,14 +172,15 @@ getSyclDeviceTypeMap() { // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST and // ONEAPI_DEVICE_SELECTOR -const std::array, 7> &getSyclBeMap() { - static const std::array, 7> SyclBeMap = { +const std::array, 8> &getSyclBeMap() { + static const std::array, 8> SyclBeMap = { {{"host", backend::host}, {"opencl", backend::opencl}, {"level_zero", backend::ext_oneapi_level_zero}, {"cuda", backend::ext_oneapi_cuda}, {"hip", backend::ext_oneapi_hip}, {"esimd_emulator", backend::ext_intel_esimd_emulator}, + {"unified_runtime", backend::unified_runtime}, {"*", backend::all}}}; return SyclBeMap; } diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index e825812996a87..7322b8cfab643 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -237,7 +237,7 @@ getSyclDeviceTypeMap(); // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST and // ONEAPI_DEVICE_SELECTOR -const std::array, 7> &getSyclBeMap(); +const std::array, 8> &getSyclBeMap(); // --------------------------------------- // ONEAPI_DEVICE_SELECTOR support diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index fb228cd85979a..c2699ca365bde 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -293,6 +293,8 @@ std::vector> findPlugins() { backend::ext_oneapi_level_zero); PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::ext_oneapi_cuda); PluginNames.emplace_back(__SYCL_HIP_PLUGIN_NAME, backend::ext_oneapi_hip); + PluginNames.emplace_back(__SYCL_UNFIED_RUNTIME_PLUGIN_NAME, + backend::unified_runtime); } else if (FilterList) { std::vector Filters = FilterList->get(); bool OpenCLFound = false; @@ -300,6 +302,7 @@ std::vector> findPlugins() { bool CudaFound = false; bool EsimdCpuFound = false; bool HIPFound = false; + bool UnifiedRuntimeFound = false; for (const device_filter &Filter : Filters) { backend Backend = Filter.Backend ? Filter.Backend.value() : backend::all; if (!OpenCLFound && @@ -330,6 +333,12 @@ std::vector> findPlugins() { backend::ext_oneapi_hip); HIPFound = true; } + if (!UnifiedRuntimeFound && + (Backend == backend::unified_runtime || Backend == backend::all)) { + PluginNames.emplace_back(__SYCL_UNFIED_RUNTIME_PLUGIN_NAME, + backend::unified_runtime); + UnifiedRuntimeFound = true; + } } } else { ods_target_list &list = *OdsTargetList; @@ -351,6 +360,10 @@ std::vector> findPlugins() { if (list.backendCompatible(backend::ext_oneapi_hip)) { PluginNames.emplace_back(__SYCL_HIP_PLUGIN_NAME, backend::ext_oneapi_hip); } + if (list.backendCompatible(backend::unified_runtime)) { + PluginNames.emplace_back(__SYCL_UNFIED_RUNTIME_PLUGIN_NAME, + backend::unified_runtime); + } } return PluginNames; } @@ -528,6 +541,7 @@ getPlugin(); template __SYCL_EXPORT const plugin & getPlugin(); template __SYCL_EXPORT const plugin &getPlugin(); +template __SYCL_EXPORT const plugin &getPlugin(); // Report error and no return (keeps compiler from printing warnings). // TODO: Probably change that to throw a catchable exception, diff --git a/sycl/test/basic_tests/interop-backend-traits.cpp b/sycl/test/basic_tests/interop-backend-traits.cpp index 6414e218415c9..13ba493af05af 100644 --- a/sycl/test/basic_tests/interop-backend-traits.cpp +++ b/sycl/test/basic_tests/interop-backend-traits.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx -fsycl -DUSE_OPENCL %s -// RUN: %clangxx -fsycl -DUSE_L0 %s +// RUN: %clangxx %fsycl-host-only -DUSE_L0 %s // RUN: %clangxx -fsycl -DUSE_CUDA %s // RUN: %clangxx -fsycl -DUSE_HIP %s // RUN: %clangxx -fsycl -DUSE_CUDA_EXPERIMENTAL %s @@ -13,7 +13,7 @@ constexpr auto Backend = sycl::backend::opencl; #endif #ifdef USE_L0 -#include +#include #include diff --git a/sycl/test/basic_tests/interop-level-zero-2020.cpp b/sycl/test/basic_tests/interop-level-zero-2020.cpp index 6274786bea46b..e73133850c823 100644 --- a/sycl/test/basic_tests/interop-level-zero-2020.cpp +++ b/sycl/test/basic_tests/interop-level-zero-2020.cpp @@ -4,7 +4,7 @@ // Test for SYCL-2020 Level Zero interop API // clang-format off -#include +#include #include // clang-format on diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index cde5d4e90b319..c5d217838f1f0 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -80,13 +80,14 @@ config.substitutions.append( ('%sycl_include', config.sycl_include ) ) config.substitutions.append( ('%sycl_source_dir', config.sycl_source_dir) ) config.substitutions.append( ('%opencl_libs_dir', config.opencl_libs_dir) ) +config.substitutions.append( ('%level_zero_include_dir', config.level_zero_include_dir) ) config.substitutions.append( ('%opencl_include_dir', config.opencl_include_dir) ) config.substitutions.append( ('%cuda_toolkit_include', config.cuda_toolkit_include) ) config.substitutions.append( ('%sycl_tools_src_dir', config.sycl_tools_src_dir ) ) config.substitutions.append( ('%llvm_build_lib_dir', config.llvm_build_lib_dir ) ) config.substitutions.append( ('%llvm_build_bin_dir', config.llvm_build_bin_dir ) ) -config.substitutions.append( ('%fsycl-host-only', '-std=c++17 -Xclang -fsycl-is-host -isystem %s -isystem %s -isystem %s' % (config.sycl_include, config.opencl_include_dir, config.sycl_include + '/sycl/') ) ) +config.substitutions.append( ('%fsycl-host-only', '-std=c++17 -Xclang -fsycl-is-host -isystem %s -isystem %s -isystem %s -isystem %s' % (config.sycl_include, config.level_zero_include_dir, config.opencl_include_dir, config.sycl_include + '/sycl/') ) ) config.substitutions.append( ('%sycl_lib', ' -lsycl6' if platform.system() == "Windows" else '-lsycl') ) llvm_config.add_tool_substitutions(['llvm-spirv'], [config.sycl_tools_dir]) diff --git a/sycl/test/lit.site.cfg.py.in b/sycl/test/lit.site.cfg.py.in index 410bd18d87789..d5a6317e2e23c 100644 --- a/sycl/test/lit.site.cfg.py.in +++ b/sycl/test/lit.site.cfg.py.in @@ -12,6 +12,7 @@ config.sycl_libs_dir = lit_config.params.get('SYCL_LIBS_DIR', "@LLVM_LIBS_DIR@") config.target_triple = "@LLVM_TARGET_TRIPLE@" config.host_triple = "@LLVM_HOST_TRIPLE@" config.opencl_libs_dir = os.path.dirname("@OpenCL_LIBRARIES@") +config.level_zero_include_dir = "@LEVEL_ZERO_INCLUDE_DIR@" config.opencl_include_dir = "@OpenCL_INCLUDE_DIR@" config.cuda_toolkit_include = "@CUDA_TOOLKIT_INCLUDE@" config.sycl_tools_src_dir = "@SYCL_TOOLS_SRC_DIR@" diff --git a/sycl/tools/sycl-trace/CMakeLists.txt b/sycl/tools/sycl-trace/CMakeLists.txt index 6f36b83a29391..8859c42a0ba8d 100644 --- a/sycl/tools/sycl-trace/CMakeLists.txt +++ b/sycl/tools/sycl-trace/CMakeLists.txt @@ -45,13 +45,13 @@ add_custom_target(pi-pretty-printers if ("level_zero" IN_LIST SYCL_ENABLE_PLUGINS) add_dependencies(sycl_pi_trace_collector pi_level_zero) - target_link_libraries(sycl_pi_trace_collector PRIVATE LevelZeroLoader::Headers) + target_link_libraries(sycl_pi_trace_collector PRIVATE LevelZeroLoader-Headers) target_compile_definitions(sycl_pi_trace_collector PRIVATE SYCL_HAS_LEVEL_ZERO) add_custom_target(ze-pretty-printers COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/generate_ze_pretty_printers.py - ${SYCL_INCLUDE_BUILD_DIR}/sycl/level_zero/ze_api.h + ${LEVEL_ZERO_INCLUDE_DIR}/ze_api.h DEPENDS pi_level_zero BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/ze_printers.def diff --git a/sycl/tools/sycl-trace/ze_trace_collector.cpp b/sycl/tools/sycl-trace/ze_trace_collector.cpp index d79e48e2874ab..fb2bf2c3d2a0e 100644 --- a/sycl/tools/sycl-trace/ze_trace_collector.cpp +++ b/sycl/tools/sycl-trace/ze_trace_collector.cpp @@ -13,7 +13,7 @@ #include -#include +#include #include #include