Skip to content

Commit

Permalink
Added env var for specifying libnvToolsExt.so
Browse files Browse the repository at this point in the history
When using APEX as an NVTX pass-through using the APEX_ENABLE_NVTX_HANDOFF
variable, NVIDIA doesn't automatically load the library with nvtx support.
Adding APEX_NVTX_LIBRARY which defaults to "libnvToolsExt.so"
but can be overridden with a full path, or you can add the path to
LD_LIBRARY_PATH.
  • Loading branch information
khuck committed Apr 8, 2024
1 parent b95e0db commit c7f5357
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 28 deletions.
88 changes: 61 additions & 27 deletions src/apex/apex_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,7 @@ void * get_symbol(const char * module, const char * symbol) {
* library, Go looking for the library, and then the symbol.
* This assumes that the LD_LIBRARY_PATH will include the
* path to the library. */
std::string libname{"libapex_"};
libname += module;
#ifdef __APPLE__
libname +=".dylib";
#else
libname +=".so";
#endif
std::string libname{module};
/* Check to see if we've already loaded it */
void * handle = dlopen(libname.c_str(),
RTLD_NOLOAD | RTLD_NOW | RTLD_LOCAL);
Expand Down Expand Up @@ -54,13 +48,18 @@ void * get_symbol(const char * module, const char * symbol) {
* so we don't need to dynamically connect to a startup function.
* However, we do need to connect to a finalize function. */
namespace ompt {
#ifdef __APPLE__
const char * module ="libapex_ompt.dylib";
#else
const char * module ="libapex_ompt.so";
#endif
void apex_ompt_force_shutdown(void);
typedef void (*apex_ompt_force_shutdown_t)(void);
void do_shutdown(void) {
if (apex_options::use_ompt()) {
// do this once
static apex_ompt_force_shutdown_t apex_ompt_force_shutdown =
(apex_ompt_force_shutdown_t)get_symbol("ompt",
(apex_ompt_force_shutdown_t)get_symbol(module,
"apex_ompt_force_shutdown");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -75,6 +74,11 @@ void * get_symbol(const char * module, const char * symbol) {
/* CUDA will need a few functions. */

namespace cuda {
#ifdef __APPLE__
const char * module ="libapex_cuda.dylib";
#else
const char * module ="libapex_cuda.so";
#endif
void apex_init_cuda_tracing(void);
void apex_flush_cuda_tracing(void);
void apex_stop_cuda_tracing(void);
Expand All @@ -85,7 +89,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_cuda()) {
// do this once
static apex_init_cuda_tracing_t apex_init_cuda_tracing =
(apex_init_cuda_tracing_t)get_symbol("cuda",
(apex_init_cuda_tracing_t)get_symbol(module,
"apex_init_cuda_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -99,7 +103,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_cuda()) {
// do this once
static apex_flush_cuda_tracing_t apex_flush_cuda_tracing =
(apex_flush_cuda_tracing_t)get_symbol("cuda",
(apex_flush_cuda_tracing_t)get_symbol(module,
"apex_flush_cuda_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -113,7 +117,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_cuda()) {
// do this once
static apex_stop_cuda_tracing_t apex_stop_cuda_tracing =
(apex_stop_cuda_tracing_t)get_symbol("cuda",
(apex_stop_cuda_tracing_t)get_symbol(module,
"apex_stop_cuda_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -126,12 +130,17 @@ void * get_symbol(const char * module, const char * symbol) {
}; // namespace apex::dynamic::cuda

namespace nvtx {
#ifdef __APPLE__
const char * module =apex_options::nvtx_library();
#else
const char * module =apex_options::nvtx_library();
#endif
typedef int (*apex_nvtx_range_push_t)(const char *);
typedef int (*apex_nvtx_range_pop_t)(void);
void push(const char * message) {
// do this once
static apex_nvtx_range_push_t apex_nvtx_range_push =
(apex_nvtx_range_push_t)get_symbol("nvtx", "nvtxRangePushA");
(apex_nvtx_range_push_t)get_symbol(module, "nvtxRangePushA");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
if (apex_nvtx_range_push != nullptr) {
Expand All @@ -140,7 +149,7 @@ void * get_symbol(const char * module, const char * symbol) {
}
void pop(void) {
static apex_nvtx_range_pop_t apex_nvtx_range_pop =
(apex_nvtx_range_pop_t)get_symbol("nvtx", "nvtxRangePop");
(apex_nvtx_range_pop_t)get_symbol(module, "nvtxRangePop");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
if (apex_nvtx_range_pop != nullptr) {
Expand All @@ -150,6 +159,11 @@ void * get_symbol(const char * module, const char * symbol) {
} // namespace apex::dynamic::nvtx

namespace nvml {
#ifdef __APPLE__
const char * module ="libapex_cuda.dylib";
#else
const char * module ="libapex_cuda.so";
#endif
void apex_nvml_monitor_query(void);
void apex_nvml_monitor_stop(void);
double apex_nvml_monitor_getAvailableMemory(void);
Expand All @@ -161,7 +175,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::monitor_gpu()) {
// do this once
static apex_nvml_monitor_query_t apex_nvml_monitor_query =
(apex_nvml_monitor_query_t)get_symbol("cuda",
(apex_nvml_monitor_query_t)get_symbol(module,
"apex_nvml_monitor_query");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -176,7 +190,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::monitor_gpu()) {
// do this once
static apex_nvml_monitor_stop_t apex_nvml_monitor_stop =
(apex_nvml_monitor_stop_t)get_symbol("cuda",
(apex_nvml_monitor_stop_t)get_symbol(module,
"apex_nvml_monitor_stop");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -191,6 +205,11 @@ void * get_symbol(const char * module, const char * symbol) {
/* HIP will need several functions. */

namespace rsmi {
#ifdef __APPLE__
const char * module ="libapex_hip.dylib";
#else
const char * module ="libapex_hip.so";
#endif
void apex_rsmi_monitor_query(void);
void apex_rsmi_monitor_stop(void);
double apex_rsmi_monitor_getAvailableMemory(void);
Expand All @@ -202,7 +221,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::monitor_gpu()) {
// do this once
static apex_rsmi_monitor_query_t apex_rsmi_monitor_query =
(apex_rsmi_monitor_query_t)get_symbol("hip",
(apex_rsmi_monitor_query_t)get_symbol(module,
"apex_rsmi_monitor_query");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -217,7 +236,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::monitor_gpu()) {
// do this once
static apex_rsmi_monitor_stop_t apex_rsmi_monitor_stop =
(apex_rsmi_monitor_stop_t)get_symbol("hip",
(apex_rsmi_monitor_stop_t)get_symbol(module,
"apex_rsmi_monitor_stop");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -232,7 +251,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::monitor_gpu()) {
// do this once
static apex_rsmi_monitor_getAvailableMemory_t apex_rsmi_monitor_getAvailableMemory =
(apex_rsmi_monitor_getAvailableMemory_t)get_symbol("hip",
(apex_rsmi_monitor_getAvailableMemory_t)get_symbol(module,
"apex_rsmi_monitor_getAvailableMemory");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -248,6 +267,11 @@ void * get_symbol(const char * module, const char * symbol) {
/* Rocprof will need several functions. */

namespace rocprofiler {
#ifdef __APPLE__
const char * module ="libapex_hip.dylib";
#else
const char * module ="libapex_hip.so";
#endif
void apex_rocprofiler_monitor_query(void);
void apex_rocprofiler_monitor_stop(void);
typedef void (*apex_rocprofiler_monitor_query_t)(void);
Expand All @@ -257,7 +281,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::monitor_gpu()) {
// do this once
static apex_rocprofiler_monitor_query_t apex_rocprofiler_monitor_query =
(apex_rocprofiler_monitor_query_t)get_symbol("hip",
(apex_rocprofiler_monitor_query_t)get_symbol(module,
"apex_rocprofiler_monitor_query");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -272,7 +296,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::monitor_gpu()) {
// do this once
static apex_rocprofiler_monitor_stop_t apex_rocprofiler_monitor_stop =
(apex_rocprofiler_monitor_stop_t)get_symbol("hip",
(apex_rocprofiler_monitor_stop_t)get_symbol(module,
"apex_rocprofiler_monitor_stop");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -285,6 +309,11 @@ void * get_symbol(const char * module, const char * symbol) {
}

namespace roctracer {
#ifdef __APPLE__
const char * module ="libapex_hip.dylib";
#else
const char * module ="libapex_hip.so";
#endif
void apex_init_hip_tracing(void);
void apex_flush_hip_tracing(void);
void apex_stop_hip_tracing(void);
Expand All @@ -295,7 +324,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_hip()) {
// do this once
static apex_init_hip_tracing_t apex_init_hip_tracing =
(apex_init_hip_tracing_t)get_symbol("hip",
(apex_init_hip_tracing_t)get_symbol(module,
"apex_init_hip_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -309,7 +338,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_hip()) {
// do this once
static apex_flush_hip_tracing_t apex_flush_hip_tracing =
(apex_flush_hip_tracing_t)get_symbol("hip",
(apex_flush_hip_tracing_t)get_symbol(module,
"apex_flush_hip_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -323,7 +352,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_hip()) {
// do this once
static apex_stop_hip_tracing_t apex_stop_hip_tracing =
(apex_stop_hip_tracing_t)get_symbol("hip",
(apex_stop_hip_tracing_t)get_symbol(module,
"apex_stop_hip_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -335,7 +364,12 @@ void * get_symbol(const char * module, const char * symbol) {

}; // namespace apex::dynamic::roctracer

namespace level0 {
namespace level0 {
#ifdef __APPLE__
const char * module ="libapex_level0.dylib";
#else
const char * module ="libapex_level0.so";
#endif
void apex_init_level0_tracing(void);
void apex_flush_level0_tracing(void);
void apex_stop_level0_tracing(void);
Expand All @@ -346,7 +380,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_level0()) {
// do this once
static apex_init_level0_tracing_t apex_init_level0_tracing =
(apex_init_level0_tracing_t)get_symbol("level0",
(apex_init_level0_tracing_t)get_symbol(module,
"apex_init_level0_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -360,7 +394,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_level0()) {
// do this once
static apex_flush_level0_tracing_t apex_flush_level0_tracing =
(apex_flush_level0_tracing_t)get_symbol("level0",
(apex_flush_level0_tracing_t)get_symbol(module,
"apex_flush_level0_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand All @@ -374,7 +408,7 @@ void * get_symbol(const char * module, const char * symbol) {
if (apex_options::use_level0()) {
// do this once
static apex_stop_level0_tracing_t apex_stop_level0_tracing =
(apex_stop_level0_tracing_t)get_symbol("level0",
(apex_stop_level0_tracing_t)get_symbol(module,
"apex_stop_level0_tracing");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
Expand Down
3 changes: 2 additions & 1 deletion src/apex/apex_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,8 @@ inline unsigned int sc_nprocessors_onln(void)
macro (APEX_EVENT_FILTER_FILE, task_event_filter_file, char*, "", "File containing names of timers to include/exclude during data collection.") \
macro (APEX_KOKKOS_TUNING_CACHE, kokkos_tuning_cache, char*, "", "Filename contining Kokkos autotuned results, tuned offline.") \
macro (APEX_KOKKOS_TUNING_POLICY, kokkos_tuning_policy, char*, "simulated_annealing", "Kokkos autotuning policy: random, exhaustive, simulated_annealing, nelder_mead.") \
macro (APEX_ROCPROF_METRICS, rocprof_metrics, char*, "", "List of metrics to periodically sample with the Rocprofiler library (see /opt/rocm/rocprofiler/lib/metrics.xml).")
macro (APEX_ROCPROF_METRICS, rocprof_metrics, char*, "", "List of metrics to periodically sample with the Rocprofiler library (see /opt/rocm/rocprofiler/lib/metrics.xml).") \
macro (APEX_NVTX_LIBRARY, nvtx_library, char*, "libnvToolsExt.so", "With NVTX listener, specify the location of libnvToolsExt.so.")
// macro (APEX_ROCPROF_METRICS, rocprof_metrics, char*, "MemUnitBusy,MemUnitStalled,VALUUtilization,VALUBusy,SALUBusy,L2CacheHit,WriteUnitStalled,ALUStalledByLDS,LDSBankConflict", "")

#if defined(_WIN32) || defined(_WIN64)
Expand Down

0 comments on commit c7f5357

Please sign in to comment.