Skip to content

Commit

Permalink
Adding NVTX pass-through support.
Browse files Browse the repository at this point in the history
As requested for the pika project, the ablity to pass APEX timers
through to NVTX. This is not compatible with APEX cuda support,
since it implements the NVTX API. However, it should work with an
applicaiton linked with APEX if the APEX_ENABLE_NVTX_HANDOFF
environment variable is set.
  • Loading branch information
khuck committed Mar 13, 2024
1 parent 2876220 commit b148ee9
Show file tree
Hide file tree
Showing 8 changed files with 198 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/apex/CMakeLists_hpx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ set(apex_sources
gzstream.cpp
handler.cpp
memory_wrapper.cpp
nvtx_listener.cpp
policy_handler.cpp
profile_reducer.cpp
profiler_listener.cpp
Expand Down
1 change: 1 addition & 0 deletions src/apex/CMakeLists_standalone.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ event_listener.cpp
exhaustive.cpp
handler.cpp
memory_wrapper.cpp
nvtx_listener.cpp
${OTF2_SOURCE}
${perfetto_sources}
perftool_implementation.cpp
Expand Down
6 changes: 5 additions & 1 deletion src/apex/apex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

#include "tau_listener.hpp"
#include "profiler_listener.hpp"
#include "nvtx_listener.hpp"
#include "trace_event_listener.hpp"
#if defined(APEX_WITH_PERFETTO)
#include "perfetto_listener.hpp"
Expand Down Expand Up @@ -350,6 +351,9 @@ void apex::_initialize()
this->the_profiler_listener = new profiler_listener();
// this is always the first listener!
listeners.push_back(the_profiler_listener);
if (apex_options::use_nvtx_handoff() && !apex_options::use_cuda()) {
listeners.push_back(new nvtx_listener());
}
if (apex_options::use_tau() && tau_loaded)
{
listeners.push_back(new tau_listener());
Expand Down Expand Up @@ -1696,7 +1700,7 @@ std::string dump(bool reset, bool finalizing) {
std::cout << "Enabling memory tracking!" << std::endl;
}
controlMemoryWrapper(true);
}
}
if (_notify_listeners) {
dump_event_data data(instance->get_node_id(),
thread_instance::get_id(), reset);
Expand Down
24 changes: 24 additions & 0 deletions src/apex/apex_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,30 @@ void * get_symbol(const char * module, const char * symbol) {

}; // namespace apex::dynamic::cuda

namespace nvtx {
typedef int (*apex_nvtx_range_push_t)(const char *);
typedef int (*apex_nvtx_range_pop_t)(void);
void push(const char * message) {
// do this once
static apex_nvtx_range_push_t apex_nvtx_range_push =
(apex_nvtx_range_push_t)get_symbol("nvtx", "nvtxRangePushA");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
if (apex_nvtx_range_push != nullptr) {
apex_nvtx_range_push(message);
}
}
void pop(void) {
static apex_nvtx_range_pop_t apex_nvtx_range_pop =
(apex_nvtx_range_pop_t)get_symbol("nvtx", "nvtxRangePop");
// shouldn't be necessary,
// but the assertion doesn't happen with release builds
if (apex_nvtx_range_pop != nullptr) {
apex_nvtx_range_pop();
}
}
} // namespace apex::dynamic::nvtx

namespace nvml {
void apex_nvml_monitor_query(void);
void apex_nvml_monitor_stop(void);
Expand Down
5 changes: 5 additions & 0 deletions src/apex/apex_dynamic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ namespace nvml {
void stop(void);
}; // namespace apex::dynamic::rsmi

namespace nvtx {
void push(const char* message);
void pop(void);
}; // namespace apex::dynamic::nvtx

namespace roctracer {
void init(void);
void flush(void);
Expand Down
1 change: 1 addition & 0 deletions src/apex/apex_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ inline unsigned int sc_nprocessors_onln(void)
macro (APEX_TIME_TOP_LEVEL_OS_THREADS, top_level_os_threads, bool, false, "When registering threads, measure their lifetimes.") \
macro (APEX_POLICY_DRAIN_TIMEOUT, policy_drain_timeout, int, 1000, "Internal usage only.") \
macro (APEX_ENABLE_CUDA, use_cuda, int, false, "Enable CUDA measurement with CUPTI support.") \
macro (APEX_ENABLE_NVTX_HANDOFF, use_nvtx_handoff, int, false, "Enable NVTX listener, to pass APEX timers as NVIDIA NVTX ranges.") \
macro (APEX_CUDA_COUNTERS, use_cuda_counters, int, false, "Enable CUDA CUPTI counter measurement.") \
macro (APEX_CUDA_KERNEL_DETAILS, use_cuda_kernel_details, int, false, "Enable Context information for CUDA CUPTI counter measurement and CUDA CUPTI API callback timers.") \
macro (APEX_CUDA_RUNTIME_API, use_cuda_runtime_api, bool, true, "Enable callbacks for the CUDA Runtime API (cuda*() functions).") \
Expand Down
107 changes: 107 additions & 0 deletions src/apex/nvtx_listener.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright (c) 2014-2021 Kevin Huck
* Copyright (c) 2014-2021 University of Oregon
*
* Distributed under the Boost Software License, Version 1.0. (See accompanying
* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*/

#include "nvtx_listener.hpp"
#include "apex_dynamic.hpp"

using namespace std;

namespace apex {

nvtx_listener::nvtx_listener (void) : _terminate(false) {
}

void nvtx_listener::on_startup(startup_event_data &data) {
APEX_UNUSED(data);
return;
}

void nvtx_listener::on_dump(dump_event_data &data) {
APEX_UNUSED(data);
return;
}

void nvtx_listener::on_shutdown(shutdown_event_data &data) {
APEX_UNUSED(data);
return;
}

void nvtx_listener::on_new_node(node_event_data &data) {
APEX_UNUSED(data);
return;
}

void nvtx_listener::on_new_thread(new_thread_event_data &data) {
APEX_UNUSED(data);
return;
}

void nvtx_listener::on_exit_thread(event_data &data) {
APEX_UNUSED(data);
return;
}

inline bool nvtx_listener::_common_start(std::shared_ptr<task_wrapper> &tt_ptr) {
if (!_terminate) {
dynamic::nvtx::push(tt_ptr->get_task_id()->get_name().c_str());
}
return true;
}

bool nvtx_listener::on_start(std::shared_ptr<task_wrapper> &tt_ptr) {
return _common_start(tt_ptr);
}

bool nvtx_listener::on_resume(std::shared_ptr<task_wrapper> &tt_ptr) {
return _common_start(tt_ptr);
}

inline void nvtx_listener::_common_stop(std::shared_ptr<profiler> &p) {
APEX_UNUSED(p);
if (!_terminate) {
dynamic::nvtx::pop();
}
return;
}

void nvtx_listener::on_stop(std::shared_ptr<profiler> &p) {
return _common_stop(p);
}

void nvtx_listener::on_yield(std::shared_ptr<profiler> &p) {
return _common_stop(p);
}

void nvtx_listener::on_sample_value(sample_value_event_data &data) {
APEX_UNUSED(data);
if (!_terminate) {
}
return;
}

void nvtx_listener::on_periodic(periodic_event_data &data) {
APEX_UNUSED(data);
return;
}

void nvtx_listener::on_custom_event(custom_event_data &data) {
APEX_UNUSED(data);
return;
}

void nvtx_listener::set_node_id(int node_id, int node_count) {
APEX_UNUSED(node_id);
APEX_UNUSED(node_count);
}

void nvtx_listener::set_metadata(const char * name, const char * value) {
APEX_UNUSED(name);
APEX_UNUSED(value);
}
}

54 changes: 54 additions & 0 deletions src/apex/nvtx_listener.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright (c) 2014-2021 Kevin Huck
* Copyright (c) 2014-2021 University of Oregon
*
* Distributed under the Boost Software License, Version 1.0. (See accompanying
* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*/

#pragma once

#include "event_listener.hpp"
#include <memory>

namespace apex {

class nvtx_listener : public event_listener {
private:
void _init(void);
bool _terminate;
bool _common_start(std::shared_ptr<task_wrapper> &tt_ptr);
void _common_stop(std::shared_ptr<profiler> &p);
static bool _initialized;
public:
nvtx_listener (void);
~nvtx_listener (void) { };
static bool initialize_nvtx(int argc, char** avgv);
inline static bool initialized(void) { return _initialized; }
void on_startup(startup_event_data &data);
void on_dump(dump_event_data &data);
void on_reset(task_identifier * id)
{ APEX_UNUSED(id); };
void on_pre_shutdown(void) {};
void on_shutdown(shutdown_event_data &data);
void on_new_node(node_event_data &data);
void on_new_thread(new_thread_event_data &data);
void on_exit_thread(event_data &data);
bool on_start(std::shared_ptr<task_wrapper> &tt_ptr);
void on_stop(std::shared_ptr<profiler> &p);
void on_yield(std::shared_ptr<profiler> &p);
bool on_resume(std::shared_ptr<task_wrapper> &tt_ptr);
void on_task_complete(std::shared_ptr<task_wrapper> &tt_ptr) {
APEX_UNUSED(tt_ptr);
};
void on_sample_value(sample_value_event_data &data);
void on_periodic(periodic_event_data &data);
void on_custom_event(custom_event_data &data);
void on_send(message_event_data &data) { APEX_UNUSED(data); };
void on_recv(message_event_data &data) { APEX_UNUSED(data); };
void set_node_id(int node_id, int node_count);
void set_metadata(const char * name, const char * value);
};

}

0 comments on commit b148ee9

Please sign in to comment.