Skip to content

Commit

Permalink
Massive refactoring.
Browse files Browse the repository at this point in the history
Removed all per-thread timer stacks. Now, profiler objects will
maintain a reference to the running timer when they were started, which
allows for "timer stack" behavior when dealing with "direct actions"
i.e. timed direct function calls from timed asynchronous tasks.
This now makes "untied timers" the default and only behavior for
maintaining a "timer stack", and it works fine for conventional timer
stacks, too.
  • Loading branch information
khuck committed Oct 3, 2024
1 parent 9720e61 commit ccf1ac8
Show file tree
Hide file tree
Showing 16 changed files with 140 additions and 496 deletions.
342 changes: 62 additions & 280 deletions src/apex/apex.cpp

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion src/apex/apex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ class apex
void stop_all_policy_handles(void);
bool policy_handle_exists(apex_policy_handle* handle);
void complete_task(std::shared_ptr<task_wrapper> task_wrapper_ptr);
static void stop_internal(profiler* p);
~apex();
std::atomic<bool> finalizing;
};
Expand Down
32 changes: 11 additions & 21 deletions src/apex/apex_error_handling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,29 +172,19 @@ static void apex_custom_signal_handler_thread_exit(
[[maybe_unused]] siginfo_t * info,
[[maybe_unused]] void * context) {
APEX_ASSERT(sig == SIGUSR2);
if (apex::apex_options::untied_timers()) {
auto p = apex::thread_instance::get_current_profiler();
apex::profiler* parent = nullptr;
while(p != nullptr) {
if (p->untied_parent == nullptr || p->untied_parent->state != apex::task_wrapper::RUNNING) {
parent = nullptr;
} else {
parent = p->untied_parent->prof;
}
// only push profilers that were started on THIS thread...
if (p != nullptr && p->thread_id == apex::thread_instance::instance().get_id()) {
profilers_to_exit().push_back(p);
}
p = parent;
auto p = apex::thread_instance::instance().get_current_profiler();
apex::profiler* parent = nullptr;
while(p != nullptr) {
if (p->untied_parent == nullptr || p->untied_parent->tt_ptr->state != apex::task_wrapper::RUNNING) {
parent = nullptr;
} else {
parent = p->untied_parent;
}
} else {
// get the timer stack, in reverse order
auto& stack = apex::thread_instance::get_current_profilers();
if (stack.size() > 0) {
for (size_t i = stack.size() ; i > 0 ; i--) {
profilers_to_exit().push_back(stack[i-1]);
}
// only push profilers that were started on THIS thread...
if (p != nullptr && p->thread_id == apex::thread_instance::instance().get_id()) {
profilers_to_exit().push_back(p);
}
p = parent;
}
threads_to_exit_count--;
return;
Expand Down
2 changes: 1 addition & 1 deletion src/apex/apex_kokkos_tuning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1225,7 +1225,7 @@ void kokkosp_request_values(
if (!apex::apex_options::use_kokkos_tuning()) { return; }
// first, get the current timer node in the task tree
//auto tlt = apex::thread_instance::get_top_level_timer();
auto tlt = apex::thread_instance::get_current_profiler();
auto tlt = apex::thread_instance::instance().get_current_profiler();
std::string tree_node{"default"};
if (tlt != nullptr) {
//tree_node = tlt->tt_ptr->tree_node->getName();
Expand Down
8 changes: 7 additions & 1 deletion src/apex/apex_preload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,19 @@ int apex_preload_main(int argc, char** argv, char** envp) {
size_t needle_len{strlen(needle)};
if (len > needle_len &&
(strncmp(argv[0] + (len - needle_len), needle, needle_len)) == 0) {
fputs("zs: skipping ", stderr);
fputs("apex: skipping ", stderr);
fputs(argv[0], stderr);
fputs("!\n", stderr);
return true;
}
fputs("apex: executing ", stderr);
fputs(argv[0], stderr);
fputs("!\n", stderr);
return false;
};
if (validate_argv0("tclsh8.6")) {
return main_real(argc, argv, envp);
}
if (validate_argv0("bash")) {
return main_real(argc, argv, envp);
}
Expand Down
1 change: 0 additions & 1 deletion src/apex/apex_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ inline unsigned int sc_nprocessors_onln(void)
macro (APEX_SUSPEND, suspend, bool, false, "Suspend APEX timers and counters during the application execution") \
macro (APEX_PAPI_SUSPEND, papi_suspend, bool, false, "Suspend PAPI counters during the application execution") \
macro (APEX_PROCESS_ASYNC_STATE, process_async_state, bool, true, "Enable/disable asynchronous processing of statistics (useful when only collecting trace data)") \
macro (APEX_UNTIED_TIMERS, untied_timers, bool, true, "Disable callstack state maintenance for specific OS threads. This allows APEX timers to start on one thread and stop on another. This is not compatible with OTF2 tracing.") \
macro (APEX_TAU, use_tau, bool, false, "Enable TAU profiling (if application is executed with tau_exec).") \
macro (APEX_OTF2, use_otf2, bool, false, "Enable OTF2 trace output.") \
macro (APEX_OTF2_COLLECTIVE_SIZE, otf2_collective_size, int, 1, "") \
Expand Down
2 changes: 2 additions & 0 deletions src/apex/dependency_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ class Node : public std::enable_shared_from_this<Node> {
}
// required for using this class as a key in a map, vector, etc.
static bool compareNodeByParentName (const std::shared_ptr<Node> lhs, const std::shared_ptr<Node> rhs) {
if (lhs == nullptr) return true;
if (rhs == nullptr) return false;
if (lhs->parents[0]->index < rhs->parents[0]->index) {
return true;
}
Expand Down
6 changes: 3 additions & 3 deletions src/apex/memory_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ void recordAlloc(const size_t bytes, const void* ptr,
static book_t& book = getBook();
double value = (double)(bytes);
if (cpu) sample_value("Memory: Bytes Allocated", value, true);
profiler * p = thread_instance::instance().get_current_profiler();
auto p = thread_instance::instance().get_current_profiler();
record_t tmp(value, thread_instance::instance().get_id(), alloc, cpu);
if (p != nullptr) { tmp.id = p->get_task_id(); }
//backtrace_record_t rec(3,tmp.backtrace);
Expand Down Expand Up @@ -168,7 +168,7 @@ void recordFree(const void* ptr, const bool cpu) {
double value = (double)(bytes);
if (cpu) sample_value("Memory: Bytes Freed", value, true);
book.totalAllocated.fetch_sub(bytes, std::memory_order_relaxed);
profiler * p = thread_instance::instance().get_current_profiler();
auto p = thread_instance::instance().get_current_profiler();
if (p == nullptr) {
auto i = apex::instance();
// might be after finalization, so double-check!
Expand All @@ -186,7 +186,7 @@ void recordFree(const void* ptr, const bool cpu) {
/* This doesn't belong here, but whatevs */
void recordMetric(std::string name, double value) {
in_apex prevent_memory_tracking;
profiler * p = thread_instance::instance().get_current_profiler();
auto p = thread_instance::instance().get_current_profiler();
if (p != nullptr) {
p->metric_map[name] = value;
}
Expand Down
15 changes: 10 additions & 5 deletions src/apex/profiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ class disabled_profiler_exception : public std::exception {
}
};

class profiler {
class profiler : public std::enable_shared_from_this<profiler> {
private:
task_identifier * task_id; // for counters, timers
public:
std::shared_ptr<task_wrapper> tt_ptr; // for timers
std::shared_ptr<task_wrapper> untied_parent; // for timer stack handling with untied timers
profiler* untied_parent; // for timer stack handling with untied timers
uint64_t start_ns;
uint64_t end_ns;
#if APEX_HAVE_PAPI
Expand All @@ -62,7 +62,7 @@ class profiler {
reset_type is_reset;
bool stopped;
// needed for correct Hatchet output
uint64_t thread_id;
uint64_t thread_id; // saved at timer start
std::map<std::string, double> metric_map;
task_identifier * get_task_id(void) {
return task_id;
Expand Down Expand Up @@ -90,6 +90,7 @@ class profiler {
is_resume(resume),
is_reset(reset), stopped(false),
thread_id(task->thread_id) {
//printf("constructor! %p\n", this); fflush(stdout);
task->prof = this;
task->start_ns = start_ns;
}
Expand Down Expand Up @@ -129,6 +130,7 @@ class profiler {
is_reset(reset_type::NONE), stopped(true) { };
//copy constructor
profiler(const profiler& in) :
std::enable_shared_from_this<profiler>(in),
task_id(in.task_id),
tt_ptr(in.tt_ptr),
start_ns(in.start_ns),
Expand All @@ -146,15 +148,17 @@ class profiler {
stopped(in.stopped),
thread_id(in.thread_id)
{
//printf("COPY!\n"); fflush(stdout);
//printf("COPY! %p -> %p\n", &in, this); fflush(stdout);
#if APEX_HAVE_PAPI
for (int i = 0 ; i < 8 ; i++) {
papi_start_values[i] = in.papi_start_values[i];
papi_stop_values[i] = in.papi_stop_values[i];
}
#endif
}
~profiler(void) { /* not much to do here. */ };
~profiler(void) { /* not much to do here. */
//printf("destructor! %p\n", this); fflush(stdout);
};
// for "yield" support
void set_start(uint64_t timestamp) {
start_ns = timestamp;
Expand Down Expand Up @@ -276,6 +280,7 @@ class profiler {
return start_ns - get_global_start();
}
}
std::shared_ptr<profiler> Get() {return shared_from_this();}
};

}
Expand Down
3 changes: 2 additions & 1 deletion src/apex/profiler_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1901,7 +1901,8 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl
//std::shared_ptr<profiler> p = std::make_shared<profiler>(tt_ptr,
//is_resume);
// get the right task identifier, based on whether there are aliases
profiler * p = new profiler(tt_ptr, is_resume);
profiler* p = new profiler(tt_ptr, is_resume);
//std::shared_ptr<profiler> p = std::make_shared<profiler>(p_prime);
p->thread_id = _pls.my_tid;
APEX_ASSERT(p->thread_id == (unsigned int)thread_instance::get_id());
p->guid = tt_ptr->guid;
Expand Down
2 changes: 1 addition & 1 deletion src/apex/taskstubs_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ maptype& getMyMap(void) {
return theMap;
}

int verbosePrint(const char *format, ...)
void verbosePrint(const char *format, ...)
{
static std::mutex local_mtx;
std::scoped_lock lock{local_mtx};
Expand Down
Loading

0 comments on commit ccf1ac8

Please sign in to comment.