From ac6a5f5face6155380637dbd75327b5d9a25ff1b Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Thu, 26 Sep 2024 16:30:39 +0000 Subject: [PATCH] Updating level0 support, still need to investigate task dependency issue that just popped up. --- src/apex/CMakeLists_standalone.cmake | 1 + src/apex/L0/utils.h | 33 +++++++++++++++++++++++++++- src/apex/L0/ze_kernel_collector.h | 14 ++++++++++++ src/apex/L0/ze_utils.h | 14 ++++++++++-- src/apex/apex.cpp | 29 +++++++++++++++++------- 5 files changed, 80 insertions(+), 11 deletions(-) diff --git a/src/apex/CMakeLists_standalone.cmake b/src/apex/CMakeLists_standalone.cmake index 20285b56..f8fac7e2 100644 --- a/src/apex/CMakeLists_standalone.cmake +++ b/src/apex/CMakeLists_standalone.cmake @@ -206,6 +206,7 @@ endif (APEX_WITH_HIP) if (APEX_WITH_LEVEL0) SET(LEVEL0_SOURCE apex_level0.cpp) add_definitions(-DAPEX_WITH_LEVEL0) + add_definitions(-DPTI_LEVEL_ZERO=1) add_library (apex_level0 ${LEVEL0_SOURCE}) target_link_libraries (apex_level0 apex ${LIBS} diff --git a/src/apex/L0/utils.h b/src/apex/L0/utils.h index 4b6c3787..837c51a4 100644 --- a/src/apex/L0/utils.h +++ b/src/apex/L0/utils.h @@ -21,6 +21,12 @@ #include "pti_assert.h" +#ifdef _WIN32 +#define PTI_EXPORT __declspec(dllexport) +#else +#define PTI_EXPORT __attribute__ ((visibility ("default"))) +#endif + #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) @@ -35,6 +41,18 @@ namespace utils { +struct DeviceUUID { + uint16_t vendorID; + uint16_t deviceID; + uint16_t revisionID; + uint16_t pciDomain; + uint8_t pciBus; + uint8_t pciDevice; + uint8_t pciFunction; + uint8_t reserved[4]; + uint8_t subDeviceId; +}; + struct Comparator { template bool operator()(const T& left, const T& right) const { @@ -45,6 +63,19 @@ namespace utils { } }; +template +struct ComparatorPciAddress { + bool operator()(const T& left, const T& right) const { + if (left.BusNumber != right.BusNumber) { + return (left.BusNumber < right.BusNumber); + } + if (left.DeviceNumber != right.DeviceNumber) { + return (left.DeviceNumber < right.DeviceNumber); + } + return left.FunctionNumber < right.FunctionNumber; + } +}; + #if defined(__gnu_linux__) inline uint64_t GetTime(clockid_t id) { @@ -167,7 +198,7 @@ namespace utils { return GetCurrentThreadId(); #else #ifdef SYS_gettid - return syscall(SYS_gettid); + return (uint32_t)syscall(SYS_gettid); #else #error "SYS_gettid is unavailable on this system" #endif diff --git a/src/apex/L0/ze_kernel_collector.h b/src/apex/L0/ze_kernel_collector.h index 48991e14..c36e02b9 100644 --- a/src/apex/L0/ze_kernel_collector.h +++ b/src/apex/L0/ze_kernel_collector.h @@ -190,6 +190,7 @@ class ZeKernelCollector { prologue_callbacks.EventPool.pfnCreateCb = OnEnterEventPoolCreate; epilogue_callbacks.EventPool.pfnCreateCb = OnExitEventPoolCreate; + epilogue_callbacks.Event.pfnHostSynchronizeCb = OnExitEventHostSynchronize; prologue_callbacks.CommandList.pfnAppendLaunchKernelCb = OnEnterCommandListAppendLaunchKernel; @@ -496,6 +497,19 @@ class ZeKernelCollector { } } + static void OnExitEventHostSynchronize(ze_event_host_synchronize_params_t *params, + ze_result_t result, + void *global_data, + void **instance_data) { + if (*(params->phEvent) != nullptr) { + ZeKernelCollector* collector = + reinterpret_cast(global_data); + PTI_ASSERT(collector != nullptr); + collector->ProcessCall(*(params->phEvent)); + collector->ProcessCalls(); + } + } + static void CreateEvent(ze_context_handle_t context, ze_event_pool_handle_t& event_pool, ze_event_handle_t& event) { diff --git a/src/apex/L0/ze_utils.h b/src/apex/L0/ze_utils.h index 4615592b..c5a25ab0 100644 --- a/src/apex/L0/ze_utils.h +++ b/src/apex/L0/ze_utils.h @@ -355,7 +355,9 @@ namespace utils { ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, nullptr}; ze_result_t status = zeDeviceGetProperties(device, &props); PTI_ASSERT(status == ZE_RESULT_SUCCESS); - return (1ull << props.kernelTimestampValidBits) - 1ull; + //return (1ull << props.kernelTimestampValidBits) - 1ull; + return ((props.kernelTimestampValidBits == 64) ? std::numeric_limits::max() + : ((1ull << props.kernelTimestampValidBits) - 1ull)); } inline uint64_t GetMetricTimestampMask(ze_device_handle_t device) { @@ -365,7 +367,15 @@ namespace utils { ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, nullptr}; ze_result_t status = zeDeviceGetProperties(device, &props); PTI_ASSERT(status == ZE_RESULT_SUCCESS); - return (1ull << props.kernelTimestampValidBits) - 1ull; + //return (1ull << props.kernelTimestampValidBits) - 1ull; + uint32_t devicemask = (props.deviceId & 0xFF00); + if ((devicemask == 0x5600) || (devicemask == 0x4F00) || (devicemask == 0x0B00)) { + return (1ull << (props.kernelTimestampValidBits - 1)) - 1ull; + } + else { + return ((props.kernelTimestampValidBits == 64) ? std::numeric_limits::max() + : ((1ull << props.kernelTimestampValidBits) - 1ull)); + } #endif } diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index f3066179..80135e28 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -249,11 +249,8 @@ void apex::_initialize() tmp << " (Debug)"; #endif tmp << "\nC++ Language Standard version : " << __cplusplus; -#if defined(__clang__) - /* Clang/LLVM. ---------------------------------------------- */ - tmp << "\nClang Compiler version : " << __VERSION__; -#elif defined(__ICC) || defined(__INTEL_COMPILER) - /* Intel ICC/ICPC. ------------------------------------------ */ +#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(__INTEL_CLANG_COMPILER) || defined(__INTEL_LLVM_COMPILER) + /* Intel ICC/ICPC/ICX/ICPX. --------------------------------- */ tmp << "\nIntel Compiler version : " << __VERSION__; #elif defined(__GNUC__) || defined(__GNUG__) /* GNU GCC/G++. --------------------------------------------- */ @@ -273,6 +270,9 @@ void apex::_initialize() #elif defined(__SUNPRO_CC) /* Oracle Solaris Studio. ----------------------------------- */ tmp << "\nOracle Compiler version : " << __SUNPRO_CC; +#elif defined(__clang__) + /* Clang/LLVM. ---------------------------------------------- */ + tmp << "\nClang Compiler version : " << __VERSION__; #endif tmp << "\nConfigured features: Pthread"; #if defined(APEX_WITH_ACTIVEHARMONY) || defined(APEX_HAVE_ACTIVEHARMONY) @@ -609,9 +609,22 @@ uint64_t init(const char * thread_name, uint64_t comm_rank, unsetenv("LD_PRELOAD"); } if (comm_rank == 0) { - printf("%s", apex_banner); - printf("APEX Version: %s\n", instance->version_string.c_str()); - printf("Executing command line: %s\n", getCommandLine().c_str()); + //printf("%s", apex_banner); + //printf("APEX Version: %s\n", instance->version_string.c_str()); + //printf("Executing command line: %s\n", getCommandLine().c_str()); + std::stringstream ss; + //ss << apex_banner << "\n"; + ss << " ___ ______ _______ __\n"; + ss << " / _ \\ | ___ \\ ___\\ \\ / /\n"; + ss << "/ /_\\ \\| |_/ / |__ \\ V /\n"; + ss << "| _ || __/| __| / \\\n"; + ss << "| | | || | | |___/ /^\\ \\\n"; + ss << "\\_| |_/\\_| \\____/\\/ \\/\n"; + ss << "APEX Version: " << instance->version_string << "\n"; + ss << "Executing command line: " << getCommandLine() << "\n" << std::endl; + std::string tmp{ss.str()}; + fputs(tmp.c_str(), stdout); + } FUNCTION_EXIT return APEX_NOERROR;