Skip to content

Commit

Permalink
move more stuff and build true pi_unified_runtime plugin
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey V Maslov <[email protected]>
  • Loading branch information
smaslov-intel committed Dec 2, 2022
1 parent 461327b commit d3f344a
Show file tree
Hide file tree
Showing 10 changed files with 341 additions and 238 deletions.
3 changes: 2 additions & 1 deletion sycl/plugins/level_zero/CMakeLists.txt
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ else()
set(LEVEL_ZERO_LOADER_TAG v1.8.8)
endif()

# Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104
set(CMAKE_INCLUDE_CURRENT_DIR OFF)

message(STATUS "Will fetch Level Zero Loader from ${LEVEL_ZERO_LOADER_REPO}")
Expand Down Expand Up @@ -57,7 +58,7 @@ add_sycl_plugin(level_zero
UnifiedRuntime-Headers
ze_loader
Threads::Threads
pi_unified_runtime
unified_runtime_static
)

find_package(Python3 REQUIRED)
Expand Down
206 changes: 6 additions & 200 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,12 @@ static const bool ReuseDiscardedEvents = [] {
return std::stoi(ReuseDiscardedEventsFlag) > 0;
}();

// Controls PI level tracing prints.
static bool PrintPiTrace = false;

// Controls support of the indirect access kernels and deferred memory release.
static const bool IndirectAccessTrackingEnabled = [] {
return std::getenv("SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY") !=
nullptr;
}();

// This will count the calls to Level-Zero
static std::map<const char *, int> *ZeCallCount = nullptr;

// Map from L0 to PI result
static inline pi_result mapError(ze_result_t Result) {
return ur2piResult(ze2urResult(Result));
Expand All @@ -118,7 +112,7 @@ static inline pi_result mapError(ze_result_t Result) {
// Trace an internal PI call; returns in case of an error.
#define PI_CALL(Call) \
{ \
if (PrintPiTrace) \
if (PrintTrace) \
fprintf(stderr, "PI ---> %s\n", #Call); \
pi_result Result = (Call); \
if (Result != PI_SUCCESS) \
Expand Down Expand Up @@ -352,15 +346,6 @@ static bool CopyEngineRequested(pi_device Device) {
}

// Global variables used in PI_Level_Zero
// Note we only create a simple pointer variables such that C++ RT won't
// deallocate them automatically at the end of the main program.
// The heap memory allocated for these global variables reclaimed only when
// Sycl RT calls piTearDown().
static std::vector<pi_platform> *PiPlatformsCache =
new std::vector<pi_platform>;
static sycl::detail::SpinLock *PiPlatformsCacheMutex =
new sycl::detail::SpinLock;
static bool PiPlatformCachePopulated = false;

pi_result
_pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
Expand Down Expand Up @@ -470,55 +455,6 @@ static pi_result enqueueMemCopyRectHelper(
const pi_event *EventWaitList, pi_event *Event,
bool PreferCopyEngine = false);

inline void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
switch (ZeError) {
#define ZE_ERRCASE(ERR) \
case ERR: \
ErrorString = "" #ERR; \
break;

ZE_ERRCASE(ZE_RESULT_SUCCESS)
ZE_ERRCASE(ZE_RESULT_NOT_READY)
ZE_ERRCASE(ZE_RESULT_ERROR_DEVICE_LOST)
ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY)
ZE_ERRCASE(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY)
ZE_ERRCASE(ZE_RESULT_ERROR_MODULE_BUILD_FAILURE)
ZE_ERRCASE(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS)
ZE_ERRCASE(ZE_RESULT_ERROR_NOT_AVAILABLE)
ZE_ERRCASE(ZE_RESULT_ERROR_UNINITIALIZED)
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_VERSION)
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ARGUMENT)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_HANDLE)
ZE_ERRCASE(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NULL_POINTER)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SIZE)
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_SIZE)
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ALIGNMENT)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_ENUMERATION)
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_ENUMERATION)
ZE_ERRCASE(ZE_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_NATIVE_BINARY)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_NAME)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_NAME)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_FUNCTION_NAME)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_GLOBAL_WIDTH_DIMENSION)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_KERNEL_ATTRIBUTE_VALUE)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE)
ZE_ERRCASE(ZE_RESULT_ERROR_OVERLAPPING_REGIONS)
ZE_ERRCASE(ZE_RESULT_ERROR_INVALID_MODULE_UNLINKED)
ZE_ERRCASE(ZE_RESULT_ERROR_UNKNOWN)

#undef ZE_ERRCASE
default:
assert(false && "Unexpected Error code");
} // switch
}

// Global variables for PI_ERROR_PLUGIN_SPECIFIC_ERROR
constexpr size_t MaxMessageSize = 256;
thread_local pi_result ErrorMessageCode = PI_SUCCESS;
Expand All @@ -538,26 +474,6 @@ pi_result piPluginGetLastError(char **message) {
return ErrorMessageCode;
}

ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName,
const char *ZeArgs, bool TraceError) {
zePrint("ZE ---> %s%s\n", ZeName, ZeArgs);

if (ZeDebug & ZE_DEBUG_CALL_COUNT) {
++(*ZeCallCount)[ZeName];
}

if (ZeResult && TraceError) {
const char *ErrorString = "Unknown";
zeParseError(ZeResult, ErrorString);
zePrint("Error (%s) in %s\n", ErrorString, ZeName);
}
return ZeResult;
}

#define PI_ASSERT(condition, error) \
if (!(condition)) \
return error;

bool _pi_queue::doReuseDiscardedEvents() {
return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents();
}
Expand Down Expand Up @@ -1580,7 +1496,7 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
// traces incurs much different timings than real execution
// ansyway, and many regression tests use it.
//
bool CurrentlyEmpty = !PrintPiTrace && this->LastCommandEvent == nullptr;
bool CurrentlyEmpty = !PrintTrace && this->LastCommandEvent == nullptr;

// The list can be empty if command-list only contains signals of proxy
// events. It is possible that executeCommandList is called twice for the same
Expand Down Expand Up @@ -2238,117 +2154,7 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule,

pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
pi_uint32 *NumPlatforms) {

static const char *PiTrace = std::getenv("SYCL_PI_TRACE");
static const int PiTraceValue = PiTrace ? std::stoi(PiTrace) : 0;
if (PiTraceValue == -1 || PiTraceValue == 2) { // Means print all PI traces
PrintPiTrace = true;
}

static std::once_flag ZeCallCountInitialized;
try {
std::call_once(ZeCallCountInitialized, []() {
if (ZeDebug & ZE_DEBUG_CALL_COUNT) {
ZeCallCount = new std::map<const char *, int>;
}
});
} catch (const std::bad_alloc &) {
return PI_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return PI_ERROR_UNKNOWN;
}

if (NumEntries == 0 && Platforms != nullptr) {
return PI_ERROR_INVALID_VALUE;
}
if (Platforms == nullptr && NumPlatforms == nullptr) {
return PI_ERROR_INVALID_VALUE;
}

// Setting these environment variables before running zeInit will enable the
// validation layer in the Level Zero loader.
if (ZeDebug & ZE_DEBUG_VALIDATION) {
setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1");
setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1");
}

// Enable SYSMAN support for obtaining the PCI address
// and maximum memory bandwidth.
if (getenv("SYCL_ENABLE_PCI") != nullptr) {
setEnvVar("ZES_ENABLE_SYSMAN", "1");
}

// TODO: We can still safely recover if something goes wrong during the init.
// Implement handling segfault using sigaction.

// We must only initialize the driver once, even if piPlatformsGet() is called
// multiple times. Declaring the return value as "static" ensures it's only
// called once.
static ze_result_t ZeResult = ZE_CALL_NOCHECK(zeInit, (0));

// Absorb the ZE_RESULT_ERROR_UNINITIALIZED and just return 0 Platforms.
if (ZeResult == ZE_RESULT_ERROR_UNINITIALIZED) {
PI_ASSERT(NumPlatforms != 0, PI_ERROR_INVALID_VALUE);
*NumPlatforms = 0;
return PI_SUCCESS;
}

if (ZeResult != ZE_RESULT_SUCCESS) {
zePrint("zeInit: Level Zero initialization failure\n");
return mapError(ZeResult);
}

// Cache pi_platforms for reuse in the future
// It solves two problems;
// 1. sycl::platform equality issue; we always return the same pi_platform.
// 2. performance; we can save time by immediately return from cache.
//

const std::lock_guard<sycl::detail::SpinLock> Lock{*PiPlatformsCacheMutex};
if (!PiPlatformCachePopulated) {
try {
// Level Zero does not have concept of Platforms, but Level Zero driver is
// the closest match.
uint32_t ZeDriverCount = 0;
ZE_CALL(zeDriverGet, (&ZeDriverCount, nullptr));
if (ZeDriverCount == 0) {
PiPlatformCachePopulated = true;
} else {
std::vector<ze_driver_handle_t> ZeDrivers;
ZeDrivers.resize(ZeDriverCount);

ZE_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data()));
for (uint32_t I = 0; I < ZeDriverCount; ++I) {
pi_platform Platform = new _pi_platform(ZeDrivers[I]);
// Save a copy in the cache for future uses.
PiPlatformsCache->push_back(Platform);

pi_result Result = Platform->initialize();
if (Result != PI_SUCCESS) {
return Result;
}
}
PiPlatformCachePopulated = true;
}
} catch (const std::bad_alloc &) {
return PI_ERROR_OUT_OF_HOST_MEMORY;
} catch (...) {
return PI_ERROR_UNKNOWN;
}
}

// Populate returned platforms from the cache.
if (Platforms) {
PI_ASSERT(NumEntries <= PiPlatformsCache->size(),
PI_ERROR_INVALID_PLATFORM);
std::copy_n(PiPlatformsCache->begin(), NumEntries, Platforms);
}

if (NumPlatforms) {
*NumPlatforms = PiPlatformsCache->size();
}

return PI_SUCCESS;
return pi2ur::piPlatformsGet(NumEntries, Platforms, NumPlatforms);
}

pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName,
Expand Down Expand Up @@ -3368,10 +3174,10 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle,
// TODO: maybe we should populate cache of platforms if it wasn't already.
// For now assert that is was populated.
PI_ASSERT(PiPlatformCachePopulated, PI_ERROR_INVALID_VALUE);
const std::lock_guard<sycl::detail::SpinLock> Lock{*PiPlatformsCacheMutex};
const std::lock_guard<SpinLock> Lock{*PiPlatformsCacheMutex};

pi_device Dev = nullptr;
for (auto &ThePlatform : *PiPlatformsCache) {
for (pi_platform ThePlatform : *PiPlatformsCache) {
Dev = ThePlatform->getDeviceFromNativeHandle(ZeDevice);
if (Dev) {
// Check that the input Platform, if was given, matches the found one.
Expand Down Expand Up @@ -8781,7 +8587,7 @@ pi_result piTearDown(void *PluginParameter) {
(void)PluginParameter;
bool LeakFound = false;
// reclaim pi_platform objects here since we don't have piPlatformRelease.
for (pi_platform &Platform : *PiPlatformsCache) {
for (pi_platform Platform : *PiPlatformsCache) {
delete Platform;
}
delete PiPlatformsCache;
Expand Down
4 changes: 3 additions & 1 deletion sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@
#include <unordered_set>
#include <vector>

#include <sycl/detail/iostream_proxy.hpp>
#include <ze_api.h>
#include <zes_api.h>
#include <sycl/detail/iostream_proxy.hpp>

// Share code between this PI L0 Plugin and UR L0 Adapter
#include <adapters/level_zero/ur_level_zero.hpp>
Expand Down Expand Up @@ -207,6 +207,8 @@ struct _pi_platform : public _ur_level_zero_platform {
pi_shared_mutex ContextsMutex;
};

struct _zer_platform_handle_t : public _pi_platform {};

// Implements memory allocation via L0 RT for USM allocator interface.
class USMMemoryAllocBase : public SystemMemory {
protected:
Expand Down
39 changes: 33 additions & 6 deletions sycl/plugins/unified_runtime/CMakeLists.txt
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,49 @@ list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS unified-runtime)

find_package(Threads REQUIRED)

#
# Build a helper static library to carry pieces shared between
# this Unified Runtime plugin and Level Zero plugin. We cannot
# use dynamic plugin library as that only exports pi* symbols.
#
add_library(unified_runtime_static STATIC
"${sycl_inc_dir}/sycl/detail/pi.h"
"${CMAKE_CURRENT_SOURCE_DIR}/ur.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ur.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/pi2ur.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/adapters/level_zero/ur_level_zero.cpp"
)

target_include_directories(unified_runtime_static
INTERFACE
"${UNIFIED_RUNTIME_INCLUDE_DIR}"
"${CMAKE_CURRENT_SOURCE_DIR}"
PRIVATE
# For include <sycl/detail/pi.h>
# TODO: how to get rid of this?
"${CMAKE_SOURCE_DIR}/../sycl/include"
)

target_link_libraries(unified_runtime_static PRIVATE
UnifiedRuntime-Headers
LevelZeroLoader-Headers
)

#
# NOTE: the Unified Runtime doesn't have the loader [yet].
# So what we really build is the Unified Runtime with Level Zero Adapter
# together.
#
add_sycl_plugin(unified_runtime
SOURCES
"${sycl_inc_dir}/sycl/detail/pi.h"
"pi2ur.cpp"
"pi2ur.hpp"
"ur.hpp"
"adapters/level_zero/ur_level_zero.hpp"
"adapters/level_zero/ur_level_zero.cpp"
# Put here anything that belongs exclusively to Unified Runtime
# and should not be shared with the Level Zero plugin
"${CMAKE_CURRENT_SOURCE_DIR}/pi2ur.cpp"
INCLUDE_DIRS
"${UNIFIED_RUNTIME_INCLUDE_DIR}"
LIBRARIES
unified_runtime_static
Threads::Threads
UnifiedRuntime-Headers
LevelZeroLoader-Headers
Expand Down
Loading

0 comments on commit d3f344a

Please sign in to comment.