Skip to content

Commit

Permalink
Merge branch 'develop' into multiple-parents
Browse files Browse the repository at this point in the history
  • Loading branch information
khuck committed Oct 1, 2024
2 parents 5fadebb + 4f82053 commit 9720e61
Show file tree
Hide file tree
Showing 18 changed files with 325 additions and 132 deletions.
42 changes: 5 additions & 37 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ set (APEX_VERSION_MINOR 6 CACHE STRING "APEX Minor Version")
set (APEX_VERSION_PATCH 5 CACHE STRING "APEX Patch Version")
set (APEX_HOMEPAGE_URL "https://github.com/UO-OACISS/apex" CACHE STRING "APEX homepage URL")

cmake_policy(VERSION 2.8.12)
# cmake_policy(VERSION 3.20.1) - implicitly called by cmake_mimium_required
if (${CMAKE_MAJOR_VERSION} GREATER 2)
cmake_policy(SET CMP0042 NEW)
if (${CMAKE_MINOR_VERSION} GREATER 11)
Expand Down Expand Up @@ -342,12 +342,7 @@ if (APEX_USE_PEDANTIC)
endif(APEX_WITH_CUDA)
endif (APEX_USE_PEDANTIC)

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
set(APEX_STDCXX_LIB "" CACHE STRING "C++ library for linking")
message("IntelLLVM compiler detected, no stdc++ library needed")
else()
set(APEX_STDCXX_LIB stdc++ CACHE STRING "C++ library for linking")
endif()
set(APEX_STDCXX_LIB stdc++ CACHE STRING "C++ library for linking")

# PGI and Intel don't like the concurrentqueue code.
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
Expand All @@ -365,35 +360,6 @@ set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# add_definitions(-std=c++11)
#include(CheckCXXCompilerFlag)
#CHECK_CXX_COMPILER_FLAG("-std=c++17" COMPILER_SUPPORTS_CXX17)
#if(COMPILER_SUPPORTS_CXX17)
# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++17" CACHE STRING "CXX Support Flag" FORCE)
#else()
# CHECK_CXX_COMPILER_FLAG("-std=c++14" COMPILER_SUPPORTS_CXX14)
# if(COMPILER_SUPPORTS_CXX14)
# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++14" CACHE STRING "CXX Support Flag" FORCE)
# else()
# CHECK_CXX_COMPILER_FLAG("-std=c++1y" COMPILER_SUPPORTS_CXX1Y)
# if(COMPILER_SUPPORTS_CXX1Y)
# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++1y" CACHE STRING "CXX Support Flag" FORCE)
# else()
# CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11)
# if(COMPILER_SUPPORTS_CXX11)
# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++11" CACHE STRING "CXX Support Flag" FORCE)
# else()
# CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X)
# if(COMPILER_SUPPORTS_CXX0X)
# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++0x" CACHE STRING "CXX Support Flag" FORCE)
# else()
# message(FATAL_ERROR " Compiler ${CMAKE_CXX_COMPILER} has no C++11 support.")
# endif()
# endif()
# endif()
# endif()
#endif()

# By the way, GCC lies. It accepts the flag, but doesn't have the support.
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
Expand Down Expand Up @@ -688,7 +654,9 @@ if(APEX_WITH_KOKKOS)
if(APEX_BUILD_TESTS)
# Just for testing
SET(Kokkos_LIBRARY kokkoscore)
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "" FORCE)
if(NOT APPLE)
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "" FORCE)
endif(NOT APPLE)
set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE)
set(Kokkos_ARCH_NATIVE ON CACHE BOOL "" FORCE)
set(Kokkos_ENABLE_TUNING ON CACHE BOOL "" FORCE)
Expand Down
1 change: 1 addition & 0 deletions src/apex/CMakeLists_standalone.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ endif (APEX_WITH_HIP)
if (APEX_WITH_LEVEL0)
SET(LEVEL0_SOURCE apex_level0.cpp)
add_definitions(-DAPEX_WITH_LEVEL0)
add_definitions(-DPTI_LEVEL_ZERO=1)
add_library (apex_level0
${LEVEL0_SOURCE})
target_link_libraries (apex_level0 apex ${LIBS}
Expand Down
33 changes: 32 additions & 1 deletion src/apex/L0/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@

#include "pti_assert.h"

#ifdef _WIN32
#define PTI_EXPORT __declspec(dllexport)
#else
#define PTI_EXPORT __attribute__ ((visibility ("default")))
#endif

#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)

Expand All @@ -35,6 +41,18 @@

namespace utils {

struct DeviceUUID {
uint16_t vendorID;
uint16_t deviceID;
uint16_t revisionID;
uint16_t pciDomain;
uint8_t pciBus;
uint8_t pciDevice;
uint8_t pciFunction;
uint8_t reserved[4];
uint8_t subDeviceId;
};

struct Comparator {
template<typename T>
bool operator()(const T& left, const T& right) const {
Expand All @@ -45,6 +63,19 @@ namespace utils {
}
};

template<typename T>
struct ComparatorPciAddress {
bool operator()(const T& left, const T& right) const {
if (left.BusNumber != right.BusNumber) {
return (left.BusNumber < right.BusNumber);
}
if (left.DeviceNumber != right.DeviceNumber) {
return (left.DeviceNumber < right.DeviceNumber);
}
return left.FunctionNumber < right.FunctionNumber;
}
};

#if defined(__gnu_linux__)

inline uint64_t GetTime(clockid_t id) {
Expand Down Expand Up @@ -167,7 +198,7 @@ namespace utils {
return GetCurrentThreadId();
#else
#ifdef SYS_gettid
return syscall(SYS_gettid);
return (uint32_t)syscall(SYS_gettid);
#else
#error "SYS_gettid is unavailable on this system"
#endif
Expand Down
14 changes: 14 additions & 0 deletions src/apex/L0/ze_kernel_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ class ZeKernelCollector {

prologue_callbacks.EventPool.pfnCreateCb = OnEnterEventPoolCreate;
epilogue_callbacks.EventPool.pfnCreateCb = OnExitEventPoolCreate;
epilogue_callbacks.Event.pfnHostSynchronizeCb = OnExitEventHostSynchronize;

prologue_callbacks.CommandList.pfnAppendLaunchKernelCb =
OnEnterCommandListAppendLaunchKernel;
Expand Down Expand Up @@ -496,6 +497,19 @@ class ZeKernelCollector {
}
}

static void OnExitEventHostSynchronize(ze_event_host_synchronize_params_t *params,
ze_result_t result,
void *global_data,
void **instance_data) {
if (*(params->phEvent) != nullptr) {
ZeKernelCollector* collector =
reinterpret_cast<ZeKernelCollector*>(global_data);
PTI_ASSERT(collector != nullptr);
collector->ProcessCall(*(params->phEvent));
collector->ProcessCalls();
}
}

static void CreateEvent(ze_context_handle_t context,
ze_event_pool_handle_t& event_pool,
ze_event_handle_t& event) {
Expand Down
14 changes: 12 additions & 2 deletions src/apex/L0/ze_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ namespace utils {
ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, nullptr};
ze_result_t status = zeDeviceGetProperties(device, &props);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
return (1ull << props.kernelTimestampValidBits) - 1ull;
//return (1ull << props.kernelTimestampValidBits) - 1ull;
return ((props.kernelTimestampValidBits == 64) ? std::numeric_limits<uint64_t>::max()
: ((1ull << props.kernelTimestampValidBits) - 1ull));
}

inline uint64_t GetMetricTimestampMask(ze_device_handle_t device) {
Expand All @@ -365,7 +367,15 @@ namespace utils {
ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, nullptr};
ze_result_t status = zeDeviceGetProperties(device, &props);
PTI_ASSERT(status == ZE_RESULT_SUCCESS);
return (1ull << props.kernelTimestampValidBits) - 1ull;
//return (1ull << props.kernelTimestampValidBits) - 1ull;
uint32_t devicemask = (props.deviceId & 0xFF00);
if ((devicemask == 0x5600) || (devicemask == 0x4F00) || (devicemask == 0x0B00)) {
return (1ull << (props.kernelTimestampValidBits - 1)) - 1ull;
}
else {
return ((props.kernelTimestampValidBits == 64) ? std::numeric_limits<uint64_t>::max()
: ((1ull << props.kernelTimestampValidBits) - 1ull));
}
#endif
}

Expand Down
29 changes: 21 additions & 8 deletions src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,11 +247,8 @@ void apex::_initialize()
tmp << " (Debug)";
#endif
tmp << "\nC++ Language Standard version : " << __cplusplus;
#if defined(__clang__)
/* Clang/LLVM. ---------------------------------------------- */
tmp << "\nClang Compiler version : " << __VERSION__;
#elif defined(__ICC) || defined(__INTEL_COMPILER)
/* Intel ICC/ICPC. ------------------------------------------ */
#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(__INTEL_CLANG_COMPILER) || defined(__INTEL_LLVM_COMPILER)
/* Intel ICC/ICPC/ICX/ICPX. --------------------------------- */
tmp << "\nIntel Compiler version : " << __VERSION__;
#elif defined(__GNUC__) || defined(__GNUG__)
/* GNU GCC/G++. --------------------------------------------- */
Expand All @@ -271,6 +268,9 @@ void apex::_initialize()
#elif defined(__SUNPRO_CC)
/* Oracle Solaris Studio. ----------------------------------- */
tmp << "\nOracle Compiler version : " << __SUNPRO_CC;
#elif defined(__clang__)
/* Clang/LLVM. ---------------------------------------------- */
tmp << "\nClang Compiler version : " << __VERSION__;
#endif
tmp << "\nConfigured features: Pthread";
#if defined(APEX_WITH_ACTIVEHARMONY) || defined(APEX_HAVE_ACTIVEHARMONY)
Expand Down Expand Up @@ -605,9 +605,22 @@ uint64_t init(const char * thread_name, uint64_t comm_rank,
unsetenv("LD_PRELOAD");
}
if (comm_rank == 0) {
printf("%s", apex_banner);
printf("APEX Version: %s\n", instance->version_string.c_str());
printf("Executing command line: %s\n", getCommandLine().c_str());
//printf("%s", apex_banner);
//printf("APEX Version: %s\n", instance->version_string.c_str());
//printf("Executing command line: %s\n", getCommandLine().c_str());
std::stringstream ss;
//ss << apex_banner << "\n";
ss << " ___ ______ _______ __\n";
ss << " / _ \\ | ___ \\ ___\\ \\ / /\n";
ss << "/ /_\\ \\| |_/ / |__ \\ V /\n";
ss << "| _ || __/| __| / \\\n";
ss << "| | | || | | |___/ /^\\ \\\n";
ss << "\\_| |_/\\_| \\____/\\/ \\/\n";
ss << "APEX Version: " << instance->version_string << "\n";
ss << "Executing command line: " << getCommandLine() << "\n" << std::endl;
std::string tmp{ss.str()};
fputs(tmp.c_str(), stdout);

}
FUNCTION_EXIT
return APEX_NOERROR;
Expand Down
31 changes: 22 additions & 9 deletions src/apex/apex_kokkos_tuning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,11 @@ class Variable {
double dmin;
double dmax;
double dstep;
uint64_t lmin;
uint64_t lmax;
uint64_t lstep;
uint64_t lvar;
uint64_t numValues;
int64_t lmin;
int64_t lmax;
int64_t lstep;
int64_t lvar;
int64_t numValues;
void makeSpace(void);
std::vector<Bin*> bins;
std::string getBin(double value) {
Expand Down Expand Up @@ -990,9 +990,10 @@ bool handle_start(const std::string & name, const size_t vars,
}
double result = profile->minimum;
if (result == 0.0) result = profile->accumulated/profile->calls;
result = result * 1.0e-9; // convert to seconds to help search math
if(verbose) {
std::cout << std::string(getDepth(), ' ');
std::cout << "querying time per call: " << (double)(result)/1000000000.0 << "s" << std::endl;
std::cout << "querying time per call: " << result << "s" << std::endl;
}
return result;
};
Expand Down Expand Up @@ -1064,24 +1065,36 @@ bool handle_start(const std::string & name, const size_t vars,
} else if (var->info.type == kokkos_value_int64) {
front = std::string(values[i].value.string_value);
}
//printf("Initial value: %s\n", front.c_str()); fflush(stdout);
//printf("Initial string value: %s\n", front.c_str()); fflush(stdout);
auto tmp = request->add_param_enum(
session.outputs[id]->name, front, space);
} else {
if (var->info.type == kokkos_value_double) {
double tval = values[i].value.double_value;
if (tval < session.outputs[id]->dmin ||
tval > session.outputs[id]->dmax) {
tval = session.outputs[id]->dmin;
}
auto tmp = request->add_param_double(
session.outputs[id]->name,
values[i].value.double_value,
session.outputs[id]->dmin,
session.outputs[id]->dmax,
session.outputs[id]->dstep);
//printf("Initial double value: %f\n", tval); fflush(stdout);
} else if (var->info.type == kokkos_value_int64) {
int64_t tval = values[i].value.int_value;
if (tval < session.outputs[id]->lmin ||
tval > session.outputs[id]->lmax) {
tval = session.outputs[id]->lmin;
}
auto tmp = request->add_param_long(
session.outputs[id]->name,
values[i].value.int_value,
tval,
session.outputs[id]->lmin,
session.outputs[id]->lmax,
session.outputs[id]->lstep);
//printf("Initial long value: %ld\n", tval); fflush(stdout);
}
}
}
Expand Down Expand Up @@ -1302,7 +1315,7 @@ void kokkosp_end_context(const size_t contextId) {
start != session.context_starts.end()) {
if (session.verbose) {
std::cout << std::string(getDepth(), ' ');
std::cout << name->second << "\t" << (end-(start->second)) << std::endl;
std::cout << name->second << "\t" << (end-(start->second)) << " sec." << std::endl;
}
if (session.used_history.count(contextId) == 0) {
apex::sample_value(name->second, (double)(end-(start->second)));
Expand Down
Loading

0 comments on commit 9720e61

Please sign in to comment.