diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index 04c53811..725fcc85 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -463,11 +463,6 @@ uint64_t init(const char * thread_name, uint64_t comm_rank, in_apex prevent_deadlocks; // if APEX is disabled, do nothing. if (apex_options::disable() == true) { FUNCTION_EXIT; return APEX_ERROR; } - // if we are configured with HPX, disable untied timers so we can handle - // direct actions correctly. -#ifdef APEX_HAVE_HPX - apex_options::untied_timers(false); -#endif // FIRST! make sure APEX thinks this is a worker thread (the main thread // is always a worker thread) thread_instance::instance(true); @@ -609,18 +604,11 @@ uint64_t init(const char * thread_name, uint64_t comm_rank, //printf("APEX Version: %s\n", instance->version_string.c_str()); //printf("Executing command line: %s\n", getCommandLine().c_str()); std::stringstream ss; - //ss << apex_banner << "\n"; - ss << " ___ ______ _______ __\n"; - ss << " / _ \\ | ___ \\ ___\\ \\ / /\n"; - ss << "/ /_\\ \\| |_/ / |__ \\ V /\n"; - ss << "| _ || __/| __| / \\\n"; - ss << "| | | || | | |___/ /^\\ \\\n"; - ss << "\\_| |_/\\_| \\____/\\/ \\/\n"; + ss << apex_banner << "\n"; ss << "APEX Version: " << instance->version_string << "\n"; ss << "Executing command line: " << getCommandLine() << "\n" << std::endl; std::string tmp{ss.str()}; fputs(tmp.c_str(), stdout); - } FUNCTION_EXIT return APEX_NOERROR; @@ -639,7 +627,7 @@ class GUIDset : public std::set { public: ~GUIDset() { for (auto& g : *this) { - printf("Orphaned timer: %lu\n", g); + std::cout << "Orphaned timer: " << std::hex << g << std::endl; } } }; @@ -657,7 +645,7 @@ void debug_print(const char * event, std::shared_ptr tt_ptr) { //APEX_ASSERT(false); return; } else { - ss << thread_instance::get_id() << " " << event << " : " << + ss << thread_instance::get_id() << " APEX: " << event << " : " << std::hex << tt_ptr->guid << " : " << tt_ptr->get_task_id()->get_name() << " - parents: "; for (auto& p : tt_ptr->parents) { ss << p->get_task_id()->get_name() << ", "; @@ -688,6 +676,7 @@ inline std::shared_ptr _new_task( const uint64_t task_id, const std::vector> parent_tasks, apex* instance) { + //printf("%s Current profiler: %s\n", __func__, thread_instance::instance().get_current_profiler() == nullptr ? "nullptr" : thread_instance::instance().get_current_profiler()->tt_ptr->task_id->get_name().c_str()); in_apex prevent_deadlocks; APEX_UNUSED(instance); std::shared_ptr tt_ptr = make_shared(); @@ -704,7 +693,7 @@ inline std::shared_ptr _new_task( tt_ptr->parents = parent_tasks; // if not, is there a current timer? } else { - profiler * p = thread_instance::instance().get_current_profiler(); + auto p = thread_instance::instance().get_current_profiler(); if (p != nullptr) { //printf("Extracting parent: %s\n", p->tt_ptr->task_id->get_name().c_str()); tt_ptr->parents.push_back(p->tt_ptr); @@ -771,53 +760,11 @@ profiler* start(const std::string &timer_name) } // make sure APEX knows about this worker thread! [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); - std::shared_ptr tt_ptr(nullptr); - profiler * new_profiler = nullptr; - if (_notify_listeners) { - bool success = true; - task_identifier * id = task_identifier::get_task_id(timer_name); - tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); - APEX_ASSERT(tt_ptr->state == task_wrapper::CREATED); - tt_ptr->state = task_wrapper::RUNNING; - LOCAL_DEBUG_PRINT("Start", tt_ptr); - APEX_UTIL_REF_COUNT_TASK_WRAPPER - //read_lock_type l(instance->listener_mutex); - /* - std::stringstream dbg; - dbg << thread_instance::get_id() << " Start : " << id->get_name() << endl; - printf("%s\n",dbg.str().c_str()); - fflush(stdout); - */ - for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { - success = instance->listeners[i]->on_start(tt_ptr); - tt_ptr->prof = thread_instance::instance().get_current_profiler(); - if (!success && i == 0) { - //cout << thread_instance::get_id() << " *** Not success! " << - //id->get_name() << endl; fflush(stdout); - APEX_UTIL_REF_COUNT_FAILED_START - return profiler::get_disabled_profiler(); - } - } - // If we are allowing untied timers, clear the timer stack on this thread - if (apex_options::untied_timers() == true) { - new_profiler = thread_instance::instance().get_current_profiler(); - thread_instance::instance().clear_current_profiler(); - } - } -#if defined(APEX_DEBUG) - const std::string apex_process_profile_str("apex::process_profiles"); - if (timer_name.compare(apex_process_profile_str) == 0) { - APEX_UTIL_REF_COUNT_APEX_INTERNAL_START - } else { - APEX_UTIL_REF_COUNT_START - } -#endif -/* - if (apex_options::untied_timers() == true) { - return new_profiler; - } - */ - return thread_instance::instance().restore_children_profilers(tt_ptr); + task_identifier * id = task_identifier::get_task_id(timer_name); + auto tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); + APEX_ASSERT(tt_ptr->state == task_wrapper::CREATED); + start(tt_ptr); + return thread_instance::instance().get_current_profiler(); } profiler* start(const apex_function_address function_address) { @@ -840,46 +787,10 @@ profiler* start(const apex_function_address function_address) { } // make sure APEX knows about this worker thread! [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); - std::shared_ptr tt_ptr(nullptr); - profiler * new_profiler = nullptr; - if (_notify_listeners) { - bool success = true; - task_identifier * id = task_identifier::get_task_id(function_address); - tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); - APEX_ASSERT(tt_ptr->state == task_wrapper::CREATED); - tt_ptr->state = task_wrapper::RUNNING; - LOCAL_DEBUG_PRINT("Start", tt_ptr); - APEX_UTIL_REF_COUNT_TASK_WRAPPER - /* - std::stringstream dbg; - dbg << thread_instance::get_id() << " Start : " << id->get_name() << endl; - printf("%s\n",dbg.str().c_str()); - fflush(stdout); - */ - //read_lock_type l(instance->listener_mutex); - for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { - success = instance->listeners[i]->on_start(tt_ptr); - tt_ptr->prof = thread_instance::instance().get_current_profiler(); - if (!success && i == 0) { - //cout << thread_instance::get_id() << " *** Not success! " << - //id->get_name() << endl; fflush(stdout); - APEX_UTIL_REF_COUNT_FAILED_START - return profiler::get_disabled_profiler(); - } - } - // If we are allowing untied timers, clear the timer stack on this thread - if (apex_options::untied_timers() == true) { - new_profiler = thread_instance::instance().get_current_profiler(); - thread_instance::instance().clear_current_profiler(); - } - } - APEX_UTIL_REF_COUNT_START - /* - if (apex_options::untied_timers() == true) { - return new_profiler; - } - */ - return thread_instance::instance().restore_children_profilers(tt_ptr); + task_identifier * id = task_identifier::get_task_id(function_address); + auto tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); + start(tt_ptr); + return thread_instance::instance().get_current_profiler(); } void start(std::shared_ptr tt_ptr) { @@ -920,34 +831,18 @@ void start(std::shared_ptr tt_ptr) { printf("Task %s created by %lu started by %lu\n", tt_ptr->task_id->get_name().c_str(), tt_ptr->thread_id, thread_instance::instance().get_id()); } - //tt_ptr->thread_id = thread_instance::instance().get_id(); APEX_ASSERT(tt_ptr->state == task_wrapper::CREATED || tt_ptr->state == task_wrapper::YIELDED); tt_ptr->state = task_wrapper::RUNNING; if (_notify_listeners) { bool success = true; - /* - std::stringstream dbg; - dbg << thread_instance::get_id() << " Start : " << tt_ptr->task_id->get_name() << endl; - printf("%s\n",dbg.str().c_str()); - fflush(stdout); - */ - //read_lock_type l(instance->listener_mutex); for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { success = instance->listeners[i]->on_start(tt_ptr); - tt_ptr->prof = thread_instance::instance().get_current_profiler(); if (!success && i == 0) { - //cout << thread_instance::get_id() << " *** Not success! " << - //id->get_name() << endl; fflush(stdout); APEX_UTIL_REF_COUNT_FAILED_START tt_ptr->prof = profiler::get_disabled_profiler(); return; } } - //tt_ptr->prof->thread_id = thread_instance::instance().get_id(); - // If we are allowing untied timers, clear the timer stack on this thread - if (apex_options::untied_timers() == true) { - thread_instance::instance().clear_current_profiler(); - } } APEX_UTIL_REF_COUNT_START thread_instance::instance().restore_children_profilers(tt_ptr); @@ -975,33 +870,29 @@ void resume(std::shared_ptr tt_ptr) { } // make sure APEX knows about this worker thread! [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); - //APEX_ASSERT(tt_ptr->state == task_wrapper::YIELDED); + APEX_ASSERT(tt_ptr->state == task_wrapper::YIELDED || + tt_ptr->state == task_wrapper::CREATED); tt_ptr->state = task_wrapper::RUNNING; if (_notify_listeners) { APEX_UTIL_REF_COUNT_TASK_WRAPPER + bool success = true; try { - //read_lock_type l(instance->listener_mutex); for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { - instance->listeners[i]->on_resume(tt_ptr); + success = instance->listeners[i]->on_resume(tt_ptr); + if (!success && i == 0) { + APEX_UTIL_REF_COUNT_FAILED_RESUME + tt_ptr->prof = profiler::get_disabled_profiler(); + return; + } } } catch (disabled_profiler_exception &e) { APEX_UTIL_REF_COUNT_FAILED_RESUME + tt_ptr->prof = profiler::get_disabled_profiler(); return; } - // If we are allowing untied timers, clear the timer stack on this thread - if (apex_options::untied_timers() == true) { - thread_instance::instance().clear_current_profiler(); - } } -#if defined(APEX_DEBUG) - const std::string apex_process_profile_str("apex::process_profiles"); - if (tt_ptr->get_task_id()->get_name(false).compare(apex_process_profile_str) - == 0) { - APEX_UTIL_REF_COUNT_APEX_INTERNAL_RESUME - } else { - APEX_UTIL_REF_COUNT_RESUME - } -#endif + APEX_UTIL_REF_COUNT_RESUME + thread_instance::instance().restore_children_profilers(tt_ptr); return; } @@ -1035,7 +926,7 @@ profiler* resume(const std::string &timer_name) { task_identifier * id = task_identifier::get_task_id(timer_name); std::shared_ptr tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); resume(tt_ptr); - return thread_instance::instance().restore_children_profilers(tt_ptr); + return thread_instance::instance().get_current_profiler(); } profiler* resume(const apex_function_address function_address) { @@ -1059,7 +950,7 @@ profiler* resume(const apex_function_address function_address) { task_identifier * id = task_identifier::get_task_id(function_address); std::shared_ptr tt_ptr = _new_task(id, UINTMAX_MAX, {}, instance); resume(tt_ptr); - return thread_instance::instance().restore_children_profilers(tt_ptr); + return thread_instance::instance().get_current_profiler(); } profiler* resume(profiler * p) { @@ -1088,22 +979,8 @@ profiler* resume(profiler * p) { } // make sure APEX knows about this worker thread! [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); - p->restart(); - APEX_ASSERT(p->tt_ptr->state == task_wrapper::STOPPED); - p->tt_ptr->state = task_wrapper::RUNNING; - if (_notify_listeners) { - try { - // skip the profiler_listener - we are restoring a child timer - // for a parent that was yielded. - for (unsigned int i = 1 ; i < instance->listeners.size() ; i++) { - instance->listeners[i]->on_resume(p->tt_ptr); - } - } catch (disabled_profiler_exception &e) { - APEX_UTIL_REF_COUNT_FAILED_RESUME - return profiler::get_disabled_profiler(); - } - } - return p; + resume(p->tt_ptr); + return thread_instance::instance().get_current_profiler(); } void reset(const std::string &timer_name) { @@ -1156,58 +1033,6 @@ void apex::complete_task(std::shared_ptr task_wrapper_ptr) { } } -void apex::stop_internal(profiler* the_profiler) { - in_apex prevent_deadlocks; - // if APEX is disabled, do nothing. - if (apex_options::disable() == true) { - APEX_UTIL_REF_COUNT_DISABLED_STOP - return; - } - if (the_profiler == profiler::get_disabled_profiler()) { - APEX_UTIL_REF_COUNT_DISABLED_STOP - return; // profiler was throttled. - } - if (the_profiler == nullptr) { - APEX_UTIL_REF_COUNT_NULL_STOP - LOCAL_DEBUG_PRINT("Stop profiler", nullptr); - return; - } - if (the_profiler->stopped) { - LOCAL_DEBUG_PRINT("Double Stop internal", the_profiler->tt_ptr); - APEX_UTIL_REF_COUNT_DOUBLE_STOP - return; - } - LOCAL_DEBUG_PRINT("Stop", the_profiler->tt_ptr); - apex* instance = apex::instance(); // get the Apex static instance - // protect against calls after finalization - if (!instance || _exited || _measurement_stopped) { - APEX_UTIL_REF_COUNT_STOP_AFTER_FINALIZE - return; - } - // make sure APEX knows about this worker thread! - [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); - std::shared_ptr p{the_profiler}; - APEX_ASSERT(p->tt_ptr->state == task_wrapper::RUNNING); - p->tt_ptr->state = task_wrapper::STOPPED; - if (_notify_listeners) { - //read_lock_type l(instance->listener_mutex); - for (unsigned int i = 0 ; i < instance->listeners.size() ; i++) { - instance->listeners[i]->on_stop(p); - } - } -#if defined(APEX_DEBUG) - const std::string apex_process_profile_str("apex::process_profiles"); - if (p->tt_ptr->get_task_id()->get_name(false).compare(apex_process_profile_str) - == 0) { - APEX_UTIL_REF_COUNT_APEX_INTERNAL_STOP - } else { - APEX_UTIL_REF_COUNT_STOP - } -#endif - instance->complete_task(p->tt_ptr); - p->tt_ptr = nullptr; -} - void stop(profiler* the_profiler, bool cleanup) { in_apex prevent_deadlocks; // protect against calls after finalization @@ -1235,14 +1060,8 @@ void stop(profiler* the_profiler, bool cleanup) { return; } LOCAL_DEBUG_PRINT("Stop", the_profiler->tt_ptr); - if (apex_options::untied_timers() == true) { - //thread_instance::instance().clear_untied_current_profiler(); - thread_instance::instance().clear_current_profiler_untied(the_profiler, false, - null_task_wrapper); - } else { - thread_instance::instance().clear_current_profiler(the_profiler, false, - null_task_wrapper); - } + thread_instance::instance().clear_current_profiler(false, + the_profiler->tt_ptr); apex* instance = apex::instance(); // get the Apex static instance // protect against calls after finalization if (!instance || _exited || _measurement_stopped) { @@ -1251,7 +1070,8 @@ void stop(profiler* the_profiler, bool cleanup) { } // make sure APEX knows about this worker thread! [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); - std::shared_ptr p{the_profiler}; + std::shared_ptr p = std::make_shared(*the_profiler); + //std::shared_ptr p = the_profiler->Get(); APEX_ASSERT(p->tt_ptr->state == task_wrapper::RUNNING); p->tt_ptr->state = task_wrapper::STOPPED; if (_notify_listeners) { @@ -1281,6 +1101,7 @@ void stop(profiler* the_profiler, bool cleanup) { //instance->active_task_wrappers.erase(p->tt_ptr); p->tt_ptr = nullptr; } + delete (the_profiler); } void stop(std::shared_ptr tt_ptr) { @@ -1314,19 +1135,12 @@ void stop(std::shared_ptr tt_ptr) { } // get the thread id that is running this task if (tt_ptr->prof->thread_id != thread_instance::instance().get_id() && - !apex_options::untied_timers()) { + apex_options::use_verbose()) { printf("Task %s started by %lu stopped by %lu\n", tt_ptr->task_id->get_name().c_str(), tt_ptr->prof->thread_id, thread_instance::instance().get_id()); //APEX_ASSERT(tt_ptr->prof->thread_id == thread_instance::instance().get_id()); } - if (apex_options::untied_timers()) { - //thread_instance::instance().clear_untied_current_profiler(); - thread_instance::instance().clear_current_profiler_untied(tt_ptr->prof, false, - null_task_wrapper); - } else { - thread_instance::instance().clear_current_profiler(tt_ptr->prof, false, - null_task_wrapper); - } + thread_instance::instance().clear_current_profiler(false, tt_ptr); // protect against calls after finalization if (!instance || _exited || _measurement_stopped) { APEX_UTIL_REF_COUNT_STOP_AFTER_FINALIZE @@ -1394,17 +1208,12 @@ void yield(profiler* the_profiler) { return; } LOCAL_DEBUG_PRINT("Yield", the_profiler->tt_ptr); - if (apex_options::untied_timers() == true) { - //thread_instance::instance().clear_untied_current_profiler(); - thread_instance::instance().clear_current_profiler_untied(the_profiler, false, - null_task_wrapper); - } else { - thread_instance::instance().clear_current_profiler(the_profiler, false, - null_task_wrapper); - } + thread_instance::instance().clear_current_profiler(false, + the_profiler->tt_ptr); // make sure APEX knows about this worker thread! [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); - std::shared_ptr p{the_profiler}; + std::shared_ptr p = std::make_shared(*the_profiler); + //std::shared_ptr p = the_profiler->Get(); APEX_ASSERT(p->tt_ptr->state == task_wrapper::RUNNING); p->tt_ptr->state = task_wrapper::YIELDED; if (_notify_listeners) { @@ -1424,6 +1233,7 @@ void yield(profiler* the_profiler) { APEX_UTIL_REF_COUNT_YIELD } #endif + delete (the_profiler); } void yield(std::shared_ptr tt_ptr) { @@ -1452,14 +1262,7 @@ void yield(std::shared_ptr tt_ptr) { APEX_UTIL_REF_COUNT_DOUBLE_YIELD return; } - if (apex_options::untied_timers() == true) { - //thread_instance::instance().clear_untied_current_profiler(); - thread_instance::instance().clear_current_profiler_untied(tt_ptr->prof, true, - tt_ptr); - } else { - thread_instance::instance().clear_current_profiler(tt_ptr->prof, true, - tt_ptr); - } + thread_instance::instance().clear_current_profiler(true, tt_ptr); // make sure APEX knows about this worker thread! [[maybe_unused]] thread_local static bool _helper = register_thread_helper(); std::shared_ptr p{tt_ptr->prof}; @@ -1937,32 +1740,24 @@ void finalize(void) // make sure it hasn't been erased! if (instance->erased_threads.find(t) == instance->erased_threads.end()) { - t->clear_all_profilers(); + //t->clear_all_profilers(); + auto top_profiler = t->get_current_profiler(); + while (top_profiler != nullptr) { + stop(top_profiler); + if (top_profiler->untied_parent == nullptr) { break; } + top_profiler = t->get_current_profiler(); + } } } } } #endif - //if (apex_options::untied_timers() == true) { - profiler * top_profiler = thread_instance::instance().get_current_profiler(); - while (top_profiler != nullptr) { - stop(top_profiler); - if (top_profiler->untied_parent == nullptr) { break; } - top_profiler = thread_instance::instance().get_current_profiler(); - } - /* - } else { - // FIRST, stop the top level timer, while the infrastructure is still - // functioning. - auto tmp = thread_instance::get_top_level_timer(); - if (tmp != nullptr) { - stop(tmp); - thread_instance::clear_top_level_timer(); - } - // Second, stop the main timer, while the infrastructure is still - // functioning. - instance->the_profiler_listener->stop_main_timer(); - } */ + auto top_profiler = thread_instance::instance().get_current_profiler(); + while (top_profiler != nullptr) { + stop(top_profiler); + if (top_profiler->untied_parent == nullptr) { break; } + top_profiler = thread_instance::instance().get_current_profiler(); + } /* Signal the other threads that have open profiles to exit */ if (apex_options::top_level_os_threads()) { //apex_signal_all_threads(); @@ -2166,26 +1961,13 @@ void exit_thread(void) instance->known_threads.erase(&ti); } } - //if (apex_options::untied_timers() == true) { - profiler * top_profiler = thread_instance::instance().get_current_profiler(); - // tell the timer cleanup that we are exiting - thread_instance::exiting(); - while (top_profiler != nullptr) { - stop(top_profiler); - top_profiler = thread_instance::instance().get_current_profiler(); - } - /* - } else { - auto tmp = thread_instance::get_top_level_timer(); - // tell the timer cleanup that we are exiting - thread_instance::exiting(); - //printf("Old thread: %p\n", &(*tmp)); - if (tmp != nullptr) { - stop(tmp); - thread_instance::clear_top_level_timer(); - } + auto top_profiler = thread_instance::instance().get_current_profiler(); + // tell the timer cleanup that we are exiting + thread_instance::exiting(); + while (top_profiler != nullptr) { + stop(top_profiler); + top_profiler = thread_instance::instance().get_current_profiler(); } - */ // ok to set this now - we need everything still running _exited = true; event_data data; diff --git a/src/apex/apex.hpp b/src/apex/apex.hpp index a93cddda..afd0e6a5 100644 --- a/src/apex/apex.hpp +++ b/src/apex/apex.hpp @@ -174,7 +174,6 @@ class apex void stop_all_policy_handles(void); bool policy_handle_exists(apex_policy_handle* handle); void complete_task(std::shared_ptr task_wrapper_ptr); - static void stop_internal(profiler* p); ~apex(); std::atomic finalizing; }; diff --git a/src/apex/apex_error_handling.cpp b/src/apex/apex_error_handling.cpp index c8e833da..be18f794 100644 --- a/src/apex/apex_error_handling.cpp +++ b/src/apex/apex_error_handling.cpp @@ -172,29 +172,19 @@ static void apex_custom_signal_handler_thread_exit( [[maybe_unused]] siginfo_t * info, [[maybe_unused]] void * context) { APEX_ASSERT(sig == SIGUSR2); - if (apex::apex_options::untied_timers()) { - auto p = apex::thread_instance::get_current_profiler(); - apex::profiler* parent = nullptr; - while(p != nullptr) { - if (p->untied_parent == nullptr || p->untied_parent->state != apex::task_wrapper::RUNNING) { - parent = nullptr; - } else { - parent = p->untied_parent->prof; - } - // only push profilers that were started on THIS thread... - if (p != nullptr && p->thread_id == apex::thread_instance::instance().get_id()) { - profilers_to_exit().push_back(p); - } - p = parent; + auto p = apex::thread_instance::instance().get_current_profiler(); + apex::profiler* parent = nullptr; + while(p != nullptr) { + if (p->untied_parent == nullptr || p->untied_parent->tt_ptr->state != apex::task_wrapper::RUNNING) { + parent = nullptr; + } else { + parent = p->untied_parent; } - } else { - // get the timer stack, in reverse order - auto& stack = apex::thread_instance::get_current_profilers(); - if (stack.size() > 0) { - for (size_t i = stack.size() ; i > 0 ; i--) { - profilers_to_exit().push_back(stack[i-1]); - } + // only push profilers that were started on THIS thread... + if (p != nullptr && p->thread_id == apex::thread_instance::instance().get_id()) { + profilers_to_exit().push_back(p); } + p = parent; } threads_to_exit_count--; return; diff --git a/src/apex/apex_kokkos_tuning.cpp b/src/apex/apex_kokkos_tuning.cpp index 5b034dfa..2258a85d 100644 --- a/src/apex/apex_kokkos_tuning.cpp +++ b/src/apex/apex_kokkos_tuning.cpp @@ -1225,7 +1225,7 @@ void kokkosp_request_values( if (!apex::apex_options::use_kokkos_tuning()) { return; } // first, get the current timer node in the task tree //auto tlt = apex::thread_instance::get_top_level_timer(); - auto tlt = apex::thread_instance::get_current_profiler(); + auto tlt = apex::thread_instance::instance().get_current_profiler(); std::string tree_node{"default"}; if (tlt != nullptr) { //tree_node = tlt->tt_ptr->tree_node->getName(); diff --git a/src/apex/apex_preload.cpp b/src/apex/apex_preload.cpp index c4f29854..e3a87f38 100644 --- a/src/apex/apex_preload.cpp +++ b/src/apex/apex_preload.cpp @@ -56,13 +56,19 @@ int apex_preload_main(int argc, char** argv, char** envp) { size_t needle_len{strlen(needle)}; if (len > needle_len && (strncmp(argv[0] + (len - needle_len), needle, needle_len)) == 0) { - fputs("zs: skipping ", stderr); + fputs("apex: skipping ", stderr); fputs(argv[0], stderr); fputs("!\n", stderr); return true; } + fputs("apex: executing ", stderr); + fputs(argv[0], stderr); + fputs("!\n", stderr); return false; }; + if (validate_argv0("tclsh8.6")) { + return main_real(argc, argv, envp); + } if (validate_argv0("bash")) { return main_real(argc, argv, envp); } diff --git a/src/apex/apex_types.h b/src/apex/apex_types.h index 7faeec6a..320bfdb4 100644 --- a/src/apex/apex_types.h +++ b/src/apex/apex_types.h @@ -270,7 +270,6 @@ inline unsigned int sc_nprocessors_onln(void) macro (APEX_SUSPEND, suspend, bool, false, "Suspend APEX timers and counters during the application execution") \ macro (APEX_PAPI_SUSPEND, papi_suspend, bool, false, "Suspend PAPI counters during the application execution") \ macro (APEX_PROCESS_ASYNC_STATE, process_async_state, bool, true, "Enable/disable asynchronous processing of statistics (useful when only collecting trace data)") \ - macro (APEX_UNTIED_TIMERS, untied_timers, bool, true, "Disable callstack state maintenance for specific OS threads. This allows APEX timers to start on one thread and stop on another. This is not compatible with OTF2 tracing.") \ macro (APEX_TAU, use_tau, bool, false, "Enable TAU profiling (if application is executed with tau_exec).") \ macro (APEX_OTF2, use_otf2, bool, false, "Enable OTF2 trace output.") \ macro (APEX_OTF2_COLLECTIVE_SIZE, otf2_collective_size, int, 1, "") \ diff --git a/src/apex/dependency_tree.hpp b/src/apex/dependency_tree.hpp index 73e979ad..050a826a 100644 --- a/src/apex/dependency_tree.hpp +++ b/src/apex/dependency_tree.hpp @@ -107,6 +107,8 @@ class Node : public std::enable_shared_from_this { } // required for using this class as a key in a map, vector, etc. static bool compareNodeByParentName (const std::shared_ptr lhs, const std::shared_ptr rhs) { + if (lhs == nullptr) return true; + if (rhs == nullptr) return false; if (lhs->parents[0]->index < rhs->parents[0]->index) { return true; } diff --git a/src/apex/memory_wrapper.cpp b/src/apex/memory_wrapper.cpp index e9b03199..0d26f184 100644 --- a/src/apex/memory_wrapper.cpp +++ b/src/apex/memory_wrapper.cpp @@ -124,7 +124,7 @@ void recordAlloc(const size_t bytes, const void* ptr, static book_t& book = getBook(); double value = (double)(bytes); if (cpu) sample_value("Memory: Bytes Allocated", value, true); - profiler * p = thread_instance::instance().get_current_profiler(); + auto p = thread_instance::instance().get_current_profiler(); record_t tmp(value, thread_instance::instance().get_id(), alloc, cpu); if (p != nullptr) { tmp.id = p->get_task_id(); } //backtrace_record_t rec(3,tmp.backtrace); @@ -168,7 +168,7 @@ void recordFree(const void* ptr, const bool cpu) { double value = (double)(bytes); if (cpu) sample_value("Memory: Bytes Freed", value, true); book.totalAllocated.fetch_sub(bytes, std::memory_order_relaxed); - profiler * p = thread_instance::instance().get_current_profiler(); + auto p = thread_instance::instance().get_current_profiler(); if (p == nullptr) { auto i = apex::instance(); // might be after finalization, so double-check! @@ -186,7 +186,7 @@ void recordFree(const void* ptr, const bool cpu) { /* This doesn't belong here, but whatevs */ void recordMetric(std::string name, double value) { in_apex prevent_memory_tracking; - profiler * p = thread_instance::instance().get_current_profiler(); + auto p = thread_instance::instance().get_current_profiler(); if (p != nullptr) { p->metric_map[name] = value; } diff --git a/src/apex/profiler.hpp b/src/apex/profiler.hpp index 4e13dbc8..3ff6a1fb 100644 --- a/src/apex/profiler.hpp +++ b/src/apex/profiler.hpp @@ -38,12 +38,12 @@ class disabled_profiler_exception : public std::exception { } }; -class profiler { +class profiler : public std::enable_shared_from_this { private: task_identifier * task_id; // for counters, timers public: std::shared_ptr tt_ptr; // for timers - std::shared_ptr untied_parent; // for timer stack handling with untied timers + profiler* untied_parent; // for timer stack handling with untied timers uint64_t start_ns; uint64_t end_ns; #if APEX_HAVE_PAPI @@ -62,7 +62,7 @@ class profiler { reset_type is_reset; bool stopped; // needed for correct Hatchet output - uint64_t thread_id; + uint64_t thread_id; // saved at timer start std::map metric_map; task_identifier * get_task_id(void) { return task_id; @@ -90,6 +90,7 @@ class profiler { is_resume(resume), is_reset(reset), stopped(false), thread_id(task->thread_id) { + //printf("constructor! %p\n", this); fflush(stdout); task->prof = this; task->start_ns = start_ns; } @@ -129,6 +130,7 @@ class profiler { is_reset(reset_type::NONE), stopped(true) { }; //copy constructor profiler(const profiler& in) : + std::enable_shared_from_this(in), task_id(in.task_id), tt_ptr(in.tt_ptr), start_ns(in.start_ns), @@ -146,7 +148,7 @@ class profiler { stopped(in.stopped), thread_id(in.thread_id) { - //printf("COPY!\n"); fflush(stdout); + //printf("COPY! %p -> %p\n", &in, this); fflush(stdout); #if APEX_HAVE_PAPI for (int i = 0 ; i < 8 ; i++) { papi_start_values[i] = in.papi_start_values[i]; @@ -154,7 +156,9 @@ class profiler { } #endif } - ~profiler(void) { /* not much to do here. */ }; + ~profiler(void) { /* not much to do here. */ + //printf("destructor! %p\n", this); fflush(stdout); + }; // for "yield" support void set_start(uint64_t timestamp) { start_ns = timestamp; @@ -276,6 +280,7 @@ class profiler { return start_ns - get_global_start(); } } + std::shared_ptr Get() {return shared_from_this();} }; } diff --git a/src/apex/profiler_listener.cpp b/src/apex/profiler_listener.cpp index 1eab0541..cd0d4c3e 100644 --- a/src/apex/profiler_listener.cpp +++ b/src/apex/profiler_listener.cpp @@ -1901,7 +1901,8 @@ if (rc != 0) cout << "PAPI error! " << name << ": " << PAPI_strerror(rc) << endl //std::shared_ptr p = std::make_shared(tt_ptr, //is_resume); // get the right task identifier, based on whether there are aliases - profiler * p = new profiler(tt_ptr, is_resume); + profiler* p = new profiler(tt_ptr, is_resume); + //std::shared_ptr p = std::make_shared(p_prime); p->thread_id = _pls.my_tid; APEX_ASSERT(p->thread_id == (unsigned int)thread_instance::get_id()); p->guid = tt_ptr->guid; diff --git a/src/apex/taskstubs_implementation.cpp b/src/apex/taskstubs_implementation.cpp index 659d9f57..62e6f635 100644 --- a/src/apex/taskstubs_implementation.cpp +++ b/src/apex/taskstubs_implementation.cpp @@ -34,7 +34,7 @@ maptype& getMyMap(void) { return theMap; } -int verbosePrint(const char *format, ...) +void verbosePrint(const char *format, ...) { static std::mutex local_mtx; std::scoped_lock lock{local_mtx}; diff --git a/src/apex/thread_instance.cpp b/src/apex/thread_instance.cpp index a81fc8ca..06a358c6 100644 --- a/src/apex/thread_instance.cpp +++ b/src/apex/thread_instance.cpp @@ -62,10 +62,6 @@ map thread_instance::_worker_map; std::mutex thread_instance::_worker_map_mutex; // Global static path to executable name string * thread_instance::_program_path = nullptr; -// Global static unordered map of parent GUIDs to child GUIDs -// to handle "overlapping timer" problem. -std::unordered_map* > - thread_instance::_children_to_resume; */ #ifdef APEX_DEBUG // Global static mutex to control access for debugging purposes @@ -245,26 +241,9 @@ string thread_instance::map_addr_to_name(apex_function_address function_address) return _function_map[function_address]; } -void thread_instance::set_current_profiler(profiler * the_profiler) { - if (apex_options::untied_timers() == true) { - APEX_ASSERT(the_profiler != nullptr && the_profiler->tt_ptr != nullptr); - // make the previous profiler on the "stack" the parent of this profiler - if (instance().untied_current_profiler == nullptr) { - //the_profiler->untied_parent = task_wrapper::get_apex_main_wrapper(); - the_profiler->untied_parent = instance().get_top_level_timer(); - } else { - the_profiler->untied_parent = instance().untied_current_profiler; - } - // make this profiler the new top of the "stack" - instance().untied_current_profiler = the_profiler->tt_ptr; - } - instance().current_profilers.push_back(the_profiler); - //printf("%lu pushing %s\n", get_id(), the_profiler->get_task_id()->get_short_name().c_str()); -} - -profiler * thread_instance::restore_children_profilers( +profiler* thread_instance::restore_children_profilers( std::shared_ptr &tt_ptr) { - profiler * parent = instance().get_current_profiler(); + profiler* parent = instance().get_current_profiler(); // if there are no children to restore, return. if (tt_ptr == nullptr || tt_ptr->data_ptr.size() == 0) {return parent;} // Get the vector of children that we stored @@ -276,7 +255,7 @@ profiler * thread_instance::restore_children_profilers( // make sure to set the current profiler - the profiler_listener // is bypassed by the resume method, above. It's the listener that // sets the current profiler when a timer is started - thread_instance::set_current_profiler((*myprof)); + //thread_instance::instance().set_current_profiler((*myprof)); } // clear the vector. myvec->clear(); @@ -284,120 +263,30 @@ profiler * thread_instance::restore_children_profilers( return parent; } -void thread_instance::clear_all_profilers() { - // nothing to do? - if (current_profilers.empty() || !_is_worker) return; - if (apex_options::untied_timers() == true) { return; } - // copy the stack - auto the_stack(current_profilers); - auto tmp = the_stack.back(); - while (the_stack.size() > 0) { - /* Make a copy of the profiler object on the top of the stack. */ - profiler * profiler_copy = new profiler(*tmp); - /* Stop the copy, using a special internal function. */ - apex::stop_internal(profiler_copy); - // pop the child from the stack copy - the_stack.pop_back(); - if (the_stack.empty()) { return; } - // get the new top of the stack - tmp = the_stack.back(); - } -} - -void thread_instance::clear_current_profiler_untied(profiler * the_profiler, +void thread_instance::clear_current_profiler( bool save_children, std::shared_ptr &tt_ptr) { static APEX_NATIVE_TLS bool fixing_stack = false; // check for recursion if (fixing_stack) {return;} // get the current profiler - auto tmp = instance().untied_current_profiler; + profiler* tmp = instance().current_profiler; + // get the task wrapper's profiler + profiler* the_profiler = tt_ptr->prof; + // This thread has no running timers, do nothing. if (tmp == nullptr) { - // nothing to do? This thread has no other running timers. + //printf("Setting current profiler to nullptr\n"); return; } - //printf("%lu popping %s\n", get_id(), tmp->get_task_id()->get_short_name().c_str()); - /* Uh-oh! Someone has caused the dreaded "overlapping timer" problem to - * happen! No problem - stop the child timer. - * Keep the children around, along with a reference to the parent's - * guid so that if/when we see this parent again, we can restart - * the children timers. */ - if (tmp->prof != the_profiler) { - fixing_stack = true; - // if the data pointer location isn't available, we can't support this runtime. - // create a vector to store the children - if (save_children == true) { - APEX_ASSERT(tt_ptr != nullptr); - } - while (tmp->prof != the_profiler) { - if (save_children == true) { - // if we are yielding, we need to stop the children - /* Make a copy of the profiler object on the top of the stack. */ - profiler * profiler_copy = new profiler(*tmp->prof); - tt_ptr->data_ptr.push_back(tmp->prof); - /* Stop the copy. The original will get reset when the - parent resumes. */ - stop(profiler_copy, false); // we better be re-entrant safe! - } else { - // since we aren't yielding, just stop the children. - stop(tmp->prof); // we better be re-entrant safe! - } - // this is a serious problem...or is it? no! - if (tmp->prof->untied_parent == nullptr) { - /* - // unless...we happen to be exiting. Bets are off. - if (apex_options::suspend() == true) { return; } - // if we've already cleared the stack on this thread, we're fine - if (instance()._exiting) { return; } - std::cerr << "Warning! empty profiler stack!" << __LINE__ << "\n"; - APEX_ASSERT(false); - //abort(); - */ - return; - } - // get the new top of the stack - tmp = tmp->prof->untied_parent; - //printf("%lu popping? %s\n", get_id(), tmp->get_task_id()->get_short_name().c_str()); - } - // done with the stack, allow proper recursion again. - fixing_stack = false; - } - instance().untied_current_profiler = tmp->prof->untied_parent; -} - -void thread_instance::clear_current_profiler(profiler * the_profiler, - bool save_children, std::shared_ptr &tt_ptr) { - // this is a stack variable that provides safety when using recursion. - static APEX_NATIVE_TLS bool fixing_stack = false; - // this is a serious problem... - if (instance().current_profilers.empty()) { - // unless...we happen to be exiting. Bets are off. - if (apex_options::suspend() == true) { return; } - if (apex_options::untied_timers() == true) { return; } - // if we've already cleared the stack on this thread, we're fine - if (instance()._exiting) { return; } - std::cerr << "APEX: Warning! empty profiler stack!!!\n"; - std::cerr << "If a profiler object was started on one OS thread "; - std::cerr << "and stopped/yielded on another, please run with the "; - std::cerr << "environment variable 'APEX_UNTIED_TIMERS=1' or "; - std::cerr << "use the C++ API call 'apex::untied_timers(true);' "; - std::cerr << "or C API call 'apex_set_untied_timers(1);'\n" << std::endl; - std::cerr << "Attempted to stop timer: " << the_profiler->get_task_id()->get_name(true) << std::endl; - APEX_ASSERT(false); - // redundant, but assert gets bypassed in a debug build. - abort(); + // if this profiler was started somewhere else, do nothing. + if (the_profiler->thread_id != instance().get_id()) { + //printf("Doing nothing with current profiler\n"); + return; } - // check for recursion - if (fixing_stack) {return;} - // get the current stack of timers - auto &the_stack = instance().current_profilers; - auto tmp = the_stack.back(); - //printf("%lu popping %s\n", get_id(), tmp->get_task_id()->get_short_name().c_str()); - /* Uh-oh! Someone has caused the dreaded "overlapping timer" problem to - * happen! No problem - stop the child timer. - * Keep the children around, along with a reference to the parent's - * guid so that if/when we see this parent again, we can restart - * the children timers. */ - if (the_stack.size() > 1 && tmp != the_profiler) { + // if the current profiler isn't this profiler, is it in the "stack"? + // we know the current profiler and the one we are stopping are + // on the same thread. Assume we are handling a "direct action" that was + // yielded. + if (tmp != the_profiler) { fixing_stack = true; // if the data pointer location isn't available, we can't support this runtime. // create a vector to store the children @@ -410,17 +299,16 @@ void thread_instance::clear_current_profiler(profiler * the_profiler, /* Make a copy of the profiler object on the top of the stack. */ profiler * profiler_copy = new profiler(*tmp); tt_ptr->data_ptr.push_back(tmp); - /* Stop the copy. The original will get reset when the + /* yield the copy. The original will get reset when the parent resumes. */ - stop(profiler_copy, false); // we better be re-entrant safe! + yield(profiler_copy); // we better be re-entrant safe! } else { // since we aren't yielding, just stop the children. stop(tmp); // we better be re-entrant safe! } - // pop the original child, we've saved it in the vector - the_stack.pop_back(); - // this is a serious problem... - if (the_stack.empty()) { + // this is a serious problem...or is it? no! + if (tmp->untied_parent == nullptr) { + /* // unless...we happen to be exiting. Bets are off. if (apex_options::suspend() == true) { return; } // if we've already cleared the stack on this thread, we're fine @@ -428,30 +316,21 @@ void thread_instance::clear_current_profiler(profiler * the_profiler, std::cerr << "Warning! empty profiler stack!" << __LINE__ << "\n"; APEX_ASSERT(false); //abort(); + */ + instance().current_profiler = nullptr; + //printf("Setting current profiler to nullptr\n"); return; } // get the new top of the stack - tmp = the_stack.back(); + tmp = tmp->untied_parent; //printf("%lu popping? %s\n", get_id(), tmp->get_task_id()->get_short_name().c_str()); } // done with the stack, allow proper recursion again. fixing_stack = false; } - // pop this timer off the stack. - the_stack.pop_back(); -} - -profiler * thread_instance::get_current_profiler(void) { - if (apex_options::untied_timers() == true) { - //APEX_ASSERT(instance().untied_current_profiler != nullptr); - //APEX_ASSERT(instance().untied_current_profiler->prof != nullptr); - if (instance().untied_current_profiler == nullptr) { - return nullptr; - } - return instance().untied_current_profiler->prof; - } - else if (instance().current_profilers.empty()) { return nullptr; } - return instance().current_profilers.back(); + //printf("%s Setting current profiler from %s to %s\n", __func__, instance().current_profiler->tt_ptr->task_id->get_name().c_str(), + //tmp->untied_parent == nullptr ? "nullptr" : tmp->untied_parent->tt_ptr->task_id->get_name().c_str()); + instance().current_profiler = tmp->untied_parent; } } diff --git a/src/apex/thread_instance.hpp b/src/apex/thread_instance.hpp index 89fbba5a..ec43eb21 100644 --- a/src/apex/thread_instance.hpp +++ b/src/apex/thread_instance.hpp @@ -52,7 +52,6 @@ class common_data { std::atomic_int _num_workers; std::atomic_int _active_threads; std::string * _program_path; - std::unordered_map* > _children_to_resume; }; class thread_instance { @@ -89,14 +88,13 @@ class thread_instance { static std::atomic_int _num_workers; static std::atomic_int _active_threads; static std::string * _program_path; - static std::unordered_map* > _children_to_resume; */ // constructor thread_instance (bool is_worker) : _id(-1), _id_reversed(UINTMAX_MAX), _runtime_id(-1), _top_level_timer_name(), _is_worker(is_worker), _task_count(0), _top_level_timer(nullptr), _exiting(false), - untied_current_profiler(nullptr) { + current_profiler(nullptr) { /* Even do this for non-workers, because for CUPTI processing we need to * generate GUIDs for the activity events! */ _id = common()._num_threads++; @@ -127,8 +125,7 @@ class thread_instance { thread_instance& operator=(thread_instance const&)= delete; // map from function address to name - unique to all threads to avoid locking std::map _function_map; - std::vector current_profilers; - std::shared_ptr untied_current_profiler; + profiler* current_profiler; uint64_t _get_guid(void) { // start at 1, because 0 means nullptr which means "no parent" _task_count++; @@ -158,23 +155,16 @@ class thread_instance { static int get_num_threads(void) { return common()._num_threads; }; static int get_num_workers(void) { return common()._num_workers; }; std::string map_addr_to_name(apex_function_address function_address); - static profiler * restore_children_profilers(std::shared_ptr &tt_ptr); - static void set_current_profiler(profiler * the_profiler); - static profiler * get_current_profiler(void); - static void clear_current_profiler(profiler * the_profiler, - bool save_children, std::shared_ptr &tt_ptr); - static void clear_current_profiler_untied(profiler * the_profiler, - bool save_children, std::shared_ptr &tt_ptr); - static void clear_current_profiler() { - instance().current_profilers.pop_back(); + static profiler* restore_children_profilers(std::shared_ptr &tt_ptr); + void set_current_profiler(profiler* the_profiler) { + //printf("%s Setting current profiler from %s to %s\n", __func__, current_profiler == nullptr ? "nullptr" : current_profiler->tt_ptr->task_id->get_name().c_str(), the_profiler->tt_ptr->task_id->get_name().c_str()); + the_profiler->untied_parent = current_profiler; + current_profiler = the_profiler; } - static std::vector& get_current_profilers(void) { return instance().current_profilers; } - static void clear_untied_current_profiler() { - auto tmp = instance().untied_current_profiler; - //APEX_ASSERT(tmp != nullptr && tmp->prof != nullptr); - if (tmp == nullptr || tmp->prof == nullptr) return; - instance().untied_current_profiler = tmp->prof->untied_parent; + profiler* get_current_profiler(void) { + return current_profiler; } + static void clear_current_profiler(bool save_children, std::shared_ptr &tt_ptr); static const char * program_path(void); static bool is_worker() { return instance()._is_worker; } static uint64_t get_guid() { return instance()._get_guid(); } diff --git a/src/apex/trace_event_listener.cpp b/src/apex/trace_event_listener.cpp index f49e3fcd..7240cd67 100644 --- a/src/apex/trace_event_listener.cpp +++ b/src/apex/trace_event_listener.cpp @@ -231,7 +231,7 @@ inline void trace_event_listener::_common_stop(std::shared_ptr &p) { for (auto& parent : p->tt_ptr->parents) { if (parent != nullptr && parent != main_wrapper #ifndef APEX_HAVE_HPX // ...except for HPX - make the flow event regardless - && (parent->thread_id != _tid || apex_options::untied_timers()) + && (parent->thread_id != _tid) #endif ) { //std::cout << "FLOWING!" << std::endl; diff --git a/src/scripts/apex_exec b/src/scripts/apex_exec index 269dd7a1..ec7fda2e 100755 --- a/src/scripts/apex_exec +++ b/src/scripts/apex_exec @@ -69,8 +69,6 @@ where APEX options are zero or more of: --apex:gpu-memory enable GPU memory wrapper support --apex:cpu-memory enable CPU memory wrapper support --apex:delay-memory delay memory wrapper support until explicitly enabled - --apex:untied enable tasks to migrate cores/OS threads - during execution (not compatible with trace output) --apex:cuda enable CUDA/CUPTI measurement (default: off) --apex:cuda-counters enable CUDA/CUPTI counter support (default: off) --apex:cuda-driver enable CUDA driver API callbacks (default: off) @@ -141,7 +139,6 @@ hip_driver=no hip_details=no level0=no monitor_gpu=no -untied=no cpuinfo=no meminfo=no ompt=no @@ -412,11 +409,6 @@ while (( "$#" )); do export APEX_PROC_SELF_IO=1 shift ;; - --apex:untied) - untied=yes - export APEX_UNTIED_TIMERS=1 - shift - ;; --apex:mpi) mpi=yes export APEX_ENABLE_MPI=1 diff --git a/src/unit_tests/C++/apex_swap_threads.cpp b/src/unit_tests/C++/apex_swap_threads.cpp index 9a90ddcd..532002e9 100644 --- a/src/unit_tests/C++/apex_swap_threads.cpp +++ b/src/unit_tests/C++/apex_swap_threads.cpp @@ -27,7 +27,6 @@ int main (int argc, char** argv) { APEX_UNUSED(argv); init("apex::swap thread unit test", 0, 1); cout << "APEX Version : " << version() << endl; - apex_options::untied_timers(true); apex_options::use_screen_output(true); apex_options::print_options(); pthread_t thread[2];