From 2d00c871cbd4752e6d3d16cfb1ed9a676eb78936 Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Mon, 17 Jun 2024 15:28:00 -0700 Subject: [PATCH] Adding initial support for multiple parents for Iris, PARSEc --- src/apex/apex.cpp | 138 +++++++++++++++++--------- src/apex/apex_api.hpp | 7 +- src/apex/dependency_tree.cpp | 45 ++++++--- src/apex/dependency_tree.hpp | 10 +- src/apex/otf2_listener.cpp | 18 +++- src/apex/perfetto_listener.cpp | 2 +- src/apex/profiler_listener.cpp | 4 +- src/apex/task_wrapper.hpp | 19 ++-- src/apex/taskstubs_implementation.cpp | 13 ++- src/apex/trace_event_listener.cpp | 54 ++++++---- src/apex/utils.cpp | 3 - src/scripts/apex-treesummary.py | 67 +++++-------- src/unit_tests/C++/CMakeLists.txt | 1 + 13 files changed, 227 insertions(+), 154 deletions(-) diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index 100e775f..e46bf81c 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -630,7 +630,8 @@ string& version() { inline std::shared_ptr _new_task( task_identifier * id, const uint64_t task_id, - const std::shared_ptr parent_task, apex* instance) { + const std::vector> parent_tasks, + apex* instance) { in_apex prevent_deadlocks; APEX_UNUSED(instance); std::shared_ptr tt_ptr = make_shared(); @@ -643,17 +644,15 @@ inline std::shared_ptr _new_task( !apex_options::use_otf2()) { tt_ptr->parent = task_wrapper::get_apex_main_wrapper(); // was a parent passed in? - } else */ if (parent_task != nullptr) { - tt_ptr->parent_guid = parent_task->guid; - tt_ptr->parent = parent_task; + } else */ if (parent_tasks.size() > 0) { + tt_ptr->parents = parent_tasks; // if not, is there a current timer? } else { profiler * p = thread_instance::instance().get_current_profiler(); if (p != nullptr) { - tt_ptr->parent_guid = p->guid; - tt_ptr->parent = p->tt_ptr; + tt_ptr->parents.push_back(p->tt_ptr); } else { - tt_ptr->parent = task_wrapper::get_apex_main_wrapper(); + tt_ptr->parents.push_back(task_wrapper::get_apex_main_wrapper()); } } if (apex_options::use_tasktree_output() || apex_options::use_hatchet_output()) { @@ -722,7 +721,7 @@ profiler* start(const std::string &timer_name) if (_notify_listeners) { bool success = true; task_identifier * id = task_identifier::get_task_id(timer_name); - tt_ptr = _new_task(id, UINTMAX_MAX, null_task_wrapper, instance); + tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); #if defined(APEX_DEBUG)//_disabled) if (apex_options::use_verbose()) { debug_print("Start", tt_ptr); } #endif @@ -787,7 +786,7 @@ profiler* start(const apex_function_address function_address) { if (_notify_listeners) { bool success = true; task_identifier * id = task_identifier::get_task_id(function_address); - tt_ptr = _new_task(id, UINTMAX_MAX, null_task_wrapper, instance); + tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); #if defined(APEX_DEBUG)//_disabled) if (apex_options::use_verbose()) { debug_print("Start", tt_ptr); } #endif @@ -918,7 +917,7 @@ profiler* resume(const std::string &timer_name) { std::shared_ptr tt_ptr(nullptr); if (_notify_listeners) { task_identifier * id = task_identifier::get_task_id(timer_name); - tt_ptr = _new_task(id, UINTMAX_MAX, null_task_wrapper, instance); + tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); APEX_UTIL_REF_COUNT_TASK_WRAPPER try { //read_lock_type l(instance->listener_mutex); @@ -962,7 +961,7 @@ profiler* resume(const apex_function_address function_address) { std::shared_ptr tt_ptr(nullptr); if (_notify_listeners) { task_identifier * id = task_identifier::get_task_id(function_address); - tt_ptr = _new_task(id, UINTMAX_MAX, null_task_wrapper, instance); + tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); APEX_UTIL_REF_COUNT_TASK_WRAPPER try { //read_lock_type l(instance->listener_mutex); @@ -1397,36 +1396,68 @@ void sample_value(const std::string &name, double value, bool threaded) } } +#define APEX_CHECK_DISABLE \ + /* if APEX is disabled, do nothing. */ \ + if (apex_options::disable() == true) { \ + APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER \ + return nullptr; \ + } +#define APEX_CHECK_SUSPEND \ + /* if APEX is suspended, do nothing. */ \ + if (apex_options::suspend() == true) { \ + APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER \ + return nullptr; \ + } +#define APEX_CHECK_INTERNAL \ + const std::string apex_internal("apex_internal"); \ + if (starts_with(name, apex_internal)) { \ + APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER \ + /* don't process our own events - queue scrubbing tasks. */ \ + return nullptr; \ + } +#define APEX_CHECK_INSTANCE \ + /* get the Apex static instance */ \ + apex* instance = apex::instance(); \ + /* protect against calls after finalization */ \ + if (!instance || _exited) { \ + APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER \ + return nullptr; \ + } + std::shared_ptr new_task( const std::string &name, const uint64_t task_id, const std::shared_ptr parent_task) { in_apex prevent_deadlocks; - // if APEX is disabled, do nothing. - if (apex_options::disable() == true) { - APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER - return nullptr; - } - // if APEX is suspended, do nothing. - if (apex_options::suspend() == true) { - APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER - return nullptr; - } - const std::string apex_internal("apex_internal"); - if (starts_with(name, apex_internal)) { - APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER - // don't process our own events - queue scrubbing tasks. - return nullptr; + APEX_CHECK_DISABLE + APEX_CHECK_SUSPEND + APEX_CHECK_INTERNAL + APEX_CHECK_INSTANCE + task_identifier * id = task_identifier::get_task_id(name); + std::vector> parents{}; + if (parent_task != null_task_wrapper) { + parents.push_back(parent_task); } - apex* instance = apex::instance(); // get the Apex static instance - if (!instance || _exited) { - APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER - return nullptr; - } // protect against calls after finalization + std::shared_ptr + tt_ptr(_new_task(id, task_id, parents, instance)); + APEX_UTIL_REF_COUNT_TASK_WRAPPER + return tt_ptr; +} + +std::shared_ptr new_task( + const std::string &name, + const uint64_t task_id, + const std::vector> parent_tasks) +{ + in_apex prevent_deadlocks; + APEX_CHECK_DISABLE + APEX_CHECK_SUSPEND + APEX_CHECK_INTERNAL + APEX_CHECK_INSTANCE task_identifier * id = task_identifier::get_task_id(name); std::shared_ptr - tt_ptr(_new_task(id, task_id, parent_task, instance)); + tt_ptr(_new_task(id, task_id, parent_tasks, instance)); APEX_UTIL_REF_COUNT_TASK_WRAPPER return tt_ptr; } @@ -1436,23 +1467,32 @@ std::shared_ptr new_task( const uint64_t task_id, const std::shared_ptr parent_task) { in_apex prevent_deadlocks; - // if APEX is disabled, do nothing. - if (apex_options::disable() == true) { - APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER - return nullptr; } - // if APEX is suspended, do nothing. - if (apex_options::suspend() == true) { - APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER - return nullptr; } - // get the Apex static instance - apex* instance = apex::instance(); - // protect against calls after finalization - if (!instance || _exited) { - APEX_UTIL_REF_COUNT_NULL_TASK_WRAPPER - return nullptr; } + APEX_CHECK_DISABLE + APEX_CHECK_SUSPEND + APEX_CHECK_INSTANCE task_identifier * id = task_identifier::get_task_id(function_address); + std::vector> parents{}; + if (parent_task != null_task_wrapper) { + parents.push_back(parent_task); + } std::shared_ptr - tt_ptr(_new_task(id, task_id, parent_task, instance)); + tt_ptr(_new_task(id, task_id, parents, instance)); + APEX_UTIL_REF_COUNT_TASK_WRAPPER + return tt_ptr; +} + +std::shared_ptr new_task( + const apex_function_address function_address, + const uint64_t task_id, + const std::vector> parent_tasks) { + in_apex prevent_deadlocks; + APEX_CHECK_DISABLE + APEX_CHECK_SUSPEND + APEX_CHECK_INSTANCE + task_identifier * id = task_identifier::get_task_id(function_address); + std::shared_ptr + tt_ptr(_new_task(id, task_id, parent_tasks, instance)); + APEX_UTIL_REF_COUNT_TASK_WRAPPER return tt_ptr; } @@ -1474,7 +1514,7 @@ std::shared_ptr update_task( // protect against calls after finalization if (!instance || _exited) { return nullptr; } task_identifier * id = task_identifier::get_task_id(timer_name); - wrapper = _new_task(id, UINTMAX_MAX, null_task_wrapper, instance); + wrapper = _new_task(id, UINTMAX_MAX, {}, instance); } else { task_identifier * id = task_identifier::get_task_id(timer_name); // only have to do something if the ID has changed @@ -1518,7 +1558,7 @@ std::shared_ptr update_task( // protect against calls after finalization if (!instance || _exited) { return nullptr; } task_identifier * id = task_identifier::get_task_id(function_address); - wrapper = _new_task(id, UINTMAX_MAX, null_task_wrapper, instance); + wrapper = _new_task(id, UINTMAX_MAX, {}, instance); } else { task_identifier * id = task_identifier::get_task_id(function_address); // only have to do something if the ID has changed diff --git a/src/apex/apex_api.hpp b/src/apex/apex_api.hpp index 2783e95a..063aadeb 100644 --- a/src/apex/apex_api.hpp +++ b/src/apex/apex_api.hpp @@ -359,7 +359,7 @@ APEX_EXPORT std::shared_ptr new_task( APEX_EXPORT std::shared_ptr new_task( const std::string &name, - const uint64_t task_id = UINTMAX_MAX, + const uint64_t task_id, const std::vector> parent_tasks); /** @@ -380,6 +380,11 @@ APEX_EXPORT std::shared_ptr new_task( const uint64_t task_id = UINTMAX_MAX, const std::shared_ptr parent_task = null_task_wrapper); +APEX_EXPORT std::shared_ptr new_task( + const apex_function_address function_address, + const uint64_t task_id, + const std::vector> parent_tasks); + /** \brief Update a task (dependency). diff --git a/src/apex/dependency_tree.cpp b/src/apex/dependency_tree.cpp index 19e7cbb2..478b9855 100644 --- a/src/apex/dependency_tree.cpp +++ b/src/apex/dependency_tree.cpp @@ -23,15 +23,22 @@ std::mutex Node::treeMutex; std::atomic Node::nodeCount{0}; std::set Node::known_metrics; -Node* Node::appendChild(task_identifier* c) { +Node* Node::appendChild(task_identifier* c, Node* existing) { treeMutex.lock(); auto iter = children.find(*c); if (iter == children.end()) { - auto n = new Node(c,this); - //std::cout << "Inserting " << c->get_name() << std::endl; - children.insert(std::make_pair(*c,n)); - treeMutex.unlock(); - return n; + if (existing != nullptr) { + existing->parents.push_back(this); + children.insert(std::make_pair(*c,existing)); + treeMutex.unlock(); + return existing; + } else { + auto n = new Node(c,this); + //std::cout << "Inserting " << c->get_name() << std::endl; + children.insert(std::make_pair(*c,n)); + treeMutex.unlock(); + return n; + } } iter->second->count++; treeMutex.unlock(); @@ -68,11 +75,16 @@ Node* Node::replaceChild(task_identifier* old_child, task_identifier* new_child) } void Node::writeNode(std::ofstream& outfile, double total) { + static std::set processed; + if (processed.count(this)) return; + processed.insert(this); static size_t depth = 0; // Write out the relationships - if (parent != nullptr) { - outfile << " \"" << parent->getIndex() << "\" -> \"" << getIndex() << "\";"; - outfile << std::endl; + for(auto& parent : parents) { + if (parent != nullptr) { + outfile << " \"" << parent->getIndex() << "\" -> \"" << getIndex() << "\";"; + outfile << std::endl; + } } double acc = (data == task_identifier::get_main_task_id() || getAccumulated() == 0.0) ? @@ -339,11 +351,20 @@ void Node::addAccumulated(double value, double incl, bool is_resume, uint64_t th double Node::writeNodeCSV(std::stringstream& outfile, double total, int node_id, int num_papi_counters) { static size_t depth = 0; + static std::set processed; + if (processed.count(this)) return getAccumulated(); + processed.insert(this); APEX_ASSERT(total > 0.0); // write out the node id and graph node index and the name - outfile << node_id << "," << index << ","; - outfile << ((parent == nullptr) ? 0 : parent->index) << ","; - outfile << depth << ",\""; + outfile << node_id << "," << index << ",\"["; + std::string delim(""); + for (auto& parent : parents) { + if (parent != nullptr) { + outfile << delim << parent->index; + delim = ","; + } + } + outfile << "]\"," << depth << ",\""; outfile << data->get_tree_name() << "\","; // write out the accumulated double acc = (data == task_identifier::get_main_task_id() || getAccumulated() == 0.0) ? diff --git a/src/apex/dependency_tree.hpp b/src/apex/dependency_tree.hpp index 4da9dc69..f9b12a82 100644 --- a/src/apex/dependency_tree.hpp +++ b/src/apex/dependency_tree.hpp @@ -48,7 +48,7 @@ class metricStorage { class Node { private: task_identifier* data; - Node* parent; + std::vector parents; size_t count; apex_profile prof; //double calls; @@ -67,8 +67,9 @@ class Node { static std::set known_metrics; public: Node(task_identifier* id, Node* p) : - data(id), parent(p), count(1), inclusive(0), + data(id), count(1), inclusive(0), index(nodeCount.fetch_add(1, std::memory_order_relaxed)) { + parents.push_back(p); prof.calls = 0.0; prof.accumulated = 0.0; prof.minimum = 0.0; @@ -83,10 +84,9 @@ class Node { } treeMutex.unlock(); } - Node* appendChild(task_identifier* c); + Node* appendChild(task_identifier* c, Node* existing); Node* replaceChild(task_identifier* old_child, task_identifier* new_child); task_identifier* getData() { return data; } - Node* getParent() { return parent; } size_t getCount() { return count; } inline double& getCalls() { return prof.calls; } inline double& getAccumulated() { return prof.accumulated; } @@ -112,7 +112,7 @@ class Node { } // required for using this class as a key in a map, vector, etc. static bool compareNodeByParentName (const Node* lhs, const Node* rhs) { - if (lhs->parent->index < rhs->parent->index) { + if (lhs->parents[0]->index < rhs->parents[0]->index) { return true; } if (lhs->getName().compare(rhs->getName()) < 0) { diff --git a/src/apex/otf2_listener.cpp b/src/apex/otf2_listener.cpp index df351d79..5a5874ce 100644 --- a/src/apex/otf2_listener.cpp +++ b/src/apex/otf2_listener.cpp @@ -1228,7 +1228,11 @@ namespace apex { OTF2_AttributeList * al = OTF2_AttributeList_New(); // create an attribute OTF2_AttributeList_AddUint64( al, 0, tt_ptr->guid ); - OTF2_AttributeList_AddUint64( al, 1, tt_ptr->parent_guid ); + std::vector pguids = {}; + for (auto& parent : tt_ptr->parents) { + pguids.push_back(parent->guid); + } + OTF2_AttributeList_AddUint64( al, pguids.size(), pguids.data() ); uint64_t idx = get_region_index(id); uint64_t stamp = 0L; if (thread_instance::get_id() == 0) { @@ -1283,7 +1287,11 @@ namespace apex { OTF2_AttributeList * al = OTF2_AttributeList_New(); // create an attribute OTF2_AttributeList_AddUint64( al, 0, p->tt_ptr->guid ); - OTF2_AttributeList_AddUint64( al, 1, p->tt_ptr->parent_guid ); + std::vector pguids = {}; + for (auto& parent : tt_ptr->parents) { + pguids.push_back(parent->guid); + } + OTF2_AttributeList_AddUint64( al, pguids.size(), pguids.data() ); // unfortunately, we can't use the timestamp from the // profiler object. bummer. it has to be taken after // the lock is acquired, so that events happen on @@ -2744,7 +2752,11 @@ namespace apex { OTF2_AttributeList * al = OTF2_AttributeList_New(); // create an attribute OTF2_AttributeList_AddUint64( al, 0, p->tt_ptr->guid ); - OTF2_AttributeList_AddUint64( al, 1, p->tt_ptr->parent_guid ); + std::vector pguids = {}; + for (auto& parent : tt_ptr->parents) { + pguids.push_back(parent->guid); + } + OTF2_AttributeList_AddUint64( al, pguids.size(), pguids.data() ); OTF2_EC(OTF2_EvtWriter_Enter( local_evt_writer, al, stamp, idx /* region */ )); stamp = p->get_stop_ns() - globalOffset; diff --git a/src/apex/perfetto_listener.cpp b/src/apex/perfetto_listener.cpp index 78182e8d..04686aa4 100644 --- a/src/apex/perfetto_listener.cpp +++ b/src/apex/perfetto_listener.cpp @@ -129,7 +129,7 @@ inline bool perfetto_listener::_common_start(std::shared_ptr &tt_p //perfetto::ProcessTrack::Current(), (uint64_t)tt_ptr->prof->get_start_ns(), _guid, tt_ptr->guid, - _pguid, tt_ptr->parent_guid); + _pguid, tt_ptr->parent_guid[0]); return true; } diff --git a/src/apex/profiler_listener.cpp b/src/apex/profiler_listener.cpp index 61398822..b5be760e 100644 --- a/src/apex/profiler_listener.cpp +++ b/src/apex/profiler_listener.cpp @@ -2053,8 +2053,8 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl // get the right task identifier, based on whether there are aliases task_identifier * id = tt_ptr->get_task_id(); // if the parent task is not null, use it (obviously) - if (tt_ptr->parent != nullptr) { - task_identifier * pid = tt_ptr->parent->get_task_id(); + for (auto& parent : tt_ptr->parents) { + task_identifier * pid = parent->get_task_id(); dependency_queue()->enqueue(new task_dependency(pid, id)); return; } diff --git a/src/apex/task_wrapper.hpp b/src/apex/task_wrapper.hpp index eb06a995..5b5db610 100644 --- a/src/apex/task_wrapper.hpp +++ b/src/apex/task_wrapper.hpp @@ -47,14 +47,10 @@ struct task_wrapper { \brief An internally generated GUID for this task. */ uint64_t guid; -/** - \brief An internally generated GUID for the parent task of this task. - */ - uint64_t parent_guid; /** \brief A managed pointer to the parent task_wrapper for this task. */ - std::shared_ptr parent; + std::vector> parents; /** \brief A node in the task tree representing this task type */ @@ -93,8 +89,7 @@ struct task_wrapper { task_id(nullptr), prof(nullptr), guid(0ull), - parent_guid(0ull), - parent(nullptr), + parents({}), tree_node(nullptr), alias(nullptr), thread_id(0UL), @@ -132,11 +127,17 @@ struct task_wrapper { } void assign_heritage() { // make/find a node for ourselves - tree_node = parent->tree_node->appendChild(task_id); + //tree_node = parents[0]->tree_node->appendChild(task_id); + for(auto& parent : parents) { + tree_node = parent->tree_node->appendChild(task_id, tree_node); + } } void update_heritage() { // make/find a node for ourselves - tree_node = parent->tree_node->replaceChild(task_id, alias); + //tree_node = parents[0]->tree_node->replaceChild(task_id, alias); + for(auto& parent : parents) { + tree_node = parent->tree_node->replaceChild(task_id, alias); + } } double get_create_us() { return double(create_ns) * 1.0e-3; diff --git a/src/apex/taskstubs_implementation.cpp b/src/apex/taskstubs_implementation.cpp index d0628c32..97a35091 100644 --- a/src/apex/taskstubs_implementation.cpp +++ b/src/apex/taskstubs_implementation.cpp @@ -88,11 +88,10 @@ extern "C" { } // if no name, use address if (timer_name == nullptr || strlen(timer_name) == 0) { - // TODO: need to handle multiple parents! - if (parent_tasks.size() > 0) { + if (parent_count > 0) { auto task = apex::new_task( (apex_function_address)function_address, - timer_guid, parent_tasks[0]); + timer_guid, parent_tasks); safeInsert(timer_guid, task); } else { auto task = apex::new_task( @@ -101,9 +100,8 @@ extern "C" { safeInsert(timer_guid, task); } } else { - // TODO: need to handle multiple parents! - if (parent_tasks.size() > 0) { - auto task = apex::new_task(timer_name, timer_guid, parent_tasks[0]); + if (parent_count > 0) { + auto task = apex::new_task(timer_name, timer_guid, parent_tasks); safeInsert(timer_guid, task); } else { auto task = apex::new_task(timer_name, timer_guid); @@ -215,7 +213,8 @@ extern "C" { } void tasktimer_command_start_impl(const char* type_name) { - auto task = apex::new_task(type_name); + std::string tmpstr{type_name}; + auto task = apex::new_task(tmpstr); timerStack(task, true); } diff --git a/src/apex/trace_event_listener.cpp b/src/apex/trace_event_listener.cpp index 214e46ff..6e9a1936 100644 --- a/src/apex/trace_event_listener.cpp +++ b/src/apex/trace_event_listener.cpp @@ -103,16 +103,33 @@ void trace_event_listener::on_exit_thread(event_data &data) { return; } +inline std::string parents_to_string(std::shared_ptr tt_ptr) { + if (tt_ptr->parents.size() == 0) { + return std::string("0"); + } + if (tt_ptr->parents.size() == 1) { + APEX_ASSERT (tt_ptr->parents[0] != nullptr); + return std::to_string(tt_ptr->parents[0]->guid); + } + std::string parents{""}; + std::string delimiter{"["}; + for (auto& parent : tt_ptr->parents) { + if (parent != nullptr) { + parents += delimiter + std::to_string(parent->guid); + delimiter = ","; + } + } + parents += "]"; + return parents; +} + inline void trace_event_listener::_common_start(std::shared_ptr &tt_ptr) { static APEX_NATIVE_TLS long unsigned int tid = get_thread_id_metadata(); if (!_terminate) { std::stringstream ss; ss.precision(3); ss << fixed; - uint64_t pguid = 0; - if (tt_ptr->parent != nullptr) { - pguid = tt_ptr->parent->guid; - } + std::string pguid = parents_to_string(tt_ptr); ss << "{\"name\":\"" << tt_ptr->get_task_id()->get_name() << "\",\"cat\":\"CPU\"" << ",\"ph\":\"B\",\"pid\":" @@ -209,23 +226,20 @@ inline void trace_event_listener::_common_stop(std::shared_ptr &p) { std::stringstream ss; ss.precision(3); ss << fixed; - uint64_t pguid = 0; - if (p->tt_ptr != nullptr && p->tt_ptr->parent != nullptr) { - pguid = p->tt_ptr->parent->guid; - } // if the parent tid is not the same, create a flow event BEFORE the single event - if (p->tt_ptr->parent != nullptr && - p->tt_ptr->parent != main_wrapper + for (auto& parent : p->tt_ptr->parents) { + if (parent != nullptr && parent != main_wrapper #ifndef APEX_HAVE_HPX // ...except for HPX - make the flow event regardless - && p->tt_ptr->parent->thread_id != _tid + && parent->thread_id != _tid #endif ) { - //std::cout << "FLOWING!" << std::endl; - uint64_t flow_id = reversed_node_id + get_flow_id(); - write_flow_event(ss, p->tt_ptr->parent->get_flow_us()+0.25, 's', "ControlFlow", flow_id, - saved_node_id, p->tt_ptr->parent->thread_id, p->tt_ptr->parent->task_id->get_name(), p->get_task_id()->get_name()); - write_flow_event(ss, p->get_start_us()-0.25, 'f', "ControlFlow", flow_id, - saved_node_id, _tid, p->tt_ptr->parent->task_id->get_name(), p->get_task_id()->get_name()); + //std::cout << "FLOWING!" << std::endl; + uint64_t flow_id = reversed_node_id + get_flow_id(); + write_flow_event(ss, parent->get_flow_us()+0.25, 's', "ControlFlow", flow_id, + saved_node_id, parent->thread_id, parent->task_id->get_name(), p->get_task_id()->get_name()); + write_flow_event(ss, p->get_start_us()-0.25, 'f', "ControlFlow", flow_id, + saved_node_id, _tid, parent->task_id->get_name(), p->get_task_id()->get_name()); + } } if (p->tt_ptr->explicit_trace_start) { ss << "{\"name\":\"" << p->get_task_id()->get_name() @@ -235,6 +249,7 @@ inline void trace_event_listener::_common_stop(std::shared_ptr &p) { << ",\"ts\":" << p->get_stop_us() << "},\n"; } else { + std::string pguid = parents_to_string(p->tt_ptr); ss << "{\"name\":\"" << p->get_task_id()->get_name() << "\",\"cat\":\"CPU\"" << ",\"ph\":\"X\",\"pid\":" @@ -364,10 +379,7 @@ void trace_event_listener::on_async_event(base_thread_node &node, ss.precision(3); ss << fixed; std::string tid{make_tid(node)}; - uint64_t pguid = 0; - if (p->tt_ptr != nullptr && p->tt_ptr->parent != nullptr) { - pguid = p->tt_ptr->parent->guid; - } + std::string pguid = parents_to_string(p->tt_ptr); ss << "{\"name\":\"" << p->get_task_id()->get_name() << "\",\"cat\":\"GPU\"" << ",\"ph\":\"X\",\"pid\":" diff --git a/src/apex/utils.cpp b/src/apex/utils.cpp index 8f580518..4f8da121 100644 --- a/src/apex/utils.cpp +++ b/src/apex/utils.cpp @@ -779,9 +779,6 @@ std::string getCommandLine(void) { #else return proc_data_reader::get_command_line(); #endif - // just in case things failed - std::string tmp{"unknown"}; - return tmp; } } // namespace apex diff --git a/src/scripts/apex-treesummary.py b/src/scripts/apex-treesummary.py index bd519294..b3f3f84c 100755 --- a/src/scripts/apex-treesummary.py +++ b/src/scripts/apex-treesummary.py @@ -66,11 +66,15 @@ def parseArgs(): return args nodeIndex = 0 +printedIndexes = set() class TreeNode: def __init__(self, name, df): global nodeIndex self.name = name - self.index = nodeIndex + if df.empty: + self.index = None #nodeIndex + else: + self.index = df['node index'].iloc[0] #nodeIndex nodeIndex = nodeIndex + 1 self.children = {} self.df = df @@ -98,7 +102,7 @@ def addChild(self, name, df): def print(self, depth, total, maxranks): tmpstr = str() acc_mean = 0.0 - if not self.df.empty: + if (not self.df.empty) and (not self.index in printedIndexes): metric = 'total time(s)' rows = str(len(self.df.index)) tmpstr = tmpstr + rows.rjust(len(str(maxranks)), ' ') @@ -126,12 +130,13 @@ def print(self, depth, total, maxranks): tmpstr = tmpstr + ', mean=' + '%.3f' % acc_mean_per_call tmpstr = tmpstr + ', threads=' + str(int(acc_threads)) tmpstr = tmpstr + '} ' + self.name + '\n' + printedIndexes.add(self.index) totals = {} strings = {} for key in self.children: - value, childstr = self.children[key].print(depth+1, total, maxranks) - totals[key] = value - strings[key] = childstr + value, childstr = self.children[key].print(depth+1, total, maxranks) + totals[key] = value + strings[key] = childstr sorted_by_value = dict(sorted(totals.items(), key=lambda x:x[1], reverse=True)) for key in sorted_by_value: tmpstr = tmpstr + strings[key] @@ -245,6 +250,7 @@ def drawDOT(df, args, name): bpc_maximum = df['bytes per call'].max() # get max if args.verbose: print('Building dot file') + graphedIndexes = set() for ind in df.index: name = df['name'][ind] node_index = df['node index'][ind] @@ -254,6 +260,8 @@ def drawDOT(df, args, name): # Remember, the root node is bogus. so skip it. if node_index != parent_index: f.write(' "' + str(parent_index) + '" -> "' + str(node_index) + '";\n') + if node_index in graphedIndexes: + continue f.write(' "' + str(node_index) + '" [shape=box; ') f.write('style=filled; ') acc = df['total time(s)'][ind] @@ -296,6 +304,7 @@ def drawDOT(df, args, name): f.write('time: ' + str(acc) + '\\l"; ') f.write('];\n') + graphedIndexes.add(node_index) f.write('}') f.close() if args.dot_show: @@ -306,36 +315,11 @@ def drawDOT(df, args, name): if args.verbose: print('done.') -def graphRank(index, df, parentNode, droplist, args): - # get the name of this node - childDF = df[df['node index'] == index].copy()#.reset_index() - name = childDF['name'].iloc[0] - # should we skip this subtree? - if name in droplist: - if args.verbose: - print('Dropping: \'', name, '\'', sep='') - return - for dropped in droplist: - p = re.compile(dropped) - if p.match(name): - if args.verbose: - print('Dropping: \'', name, '\'', sep='') - return - - #name = df.loc[df['node index'] == index, 'name'].iloc[0] - childNode = parentNode.addChild(name, childDF) - - # slice out the children from the dataframe - children = df[df['parent index'] == index] - # Iterate over the children indexes and add to our node - for child in children['node index'].unique(): - if child == index: - continue - graphRank(child, df, childNode, droplist, args) - def graphRank2(index, df, parentNode, droplist, args): # get the name of this node childDF = df[df['node index'] == index].copy()#.reset_index() + if childDF.shape[0] > 1: + childDF = childDF[childDF['parent index'] == parentNode.index]#.reset_index() name = childDF['name'].iloc[0] # should we skip this subtree? if name in droplist: @@ -360,6 +344,8 @@ def graphRank2(index, df, parentNode, droplist, args): continue graphRank2(child, df, childNode, droplist, args) +import ast + def main(): args = parseArgs() if (args.tau): @@ -370,6 +356,14 @@ def main(): print('Reading tasktree...') df = pd.read_csv(args.filename) #, index_col=[0,1]) df = df.fillna(0) + print(df) + # Convert the string representation of the list of parents to a list + df.loc[:, "parent index"] = df["parent index"].apply(ast.literal_eval) + df = df.explode('parent index') + df = df.fillna(-1) + print(df) + print(df) + if args.verbose: print('Read', len(df.index), 'rows') @@ -421,15 +415,6 @@ def main(): # FIRST, build a master graph with all nodes from all ranks. print('building common tree...') root = TreeNode('apex tree base', pd.DataFrame()) - """ - for x in range(maxrank+1): - print('Rank', x, '...', end=endchar, flush=True) - # slice out this rank's data - rank = df[df['process rank'] == x] - # build a tree of this rank's data - graphRank(0, rank, root, droplist, args) - print() # write a newline - """ #unique = df.drop_duplicates(subset=["node index", "parent index", "name"], keep='first') graphRank2(0, df, root, droplist, args) diff --git a/src/unit_tests/C++/CMakeLists.txt b/src/unit_tests/C++/CMakeLists.txt index 6b9e7e14..44e816f4 100644 --- a/src/unit_tests/C++/CMakeLists.txt +++ b/src/unit_tests/C++/CMakeLists.txt @@ -47,6 +47,7 @@ set(example_programs apex_swap_threads apex_malloc apex_std_thread + apex_multiple_parents ${APEX_OPENMP_TEST} ) #apex_set_thread_cap