From 1f45a22d319bab3b33c58300f60f8b5ee8cefeef Mon Sep 17 00:00:00 2001 From: Kevin Huck Date: Fri, 23 Aug 2024 13:17:52 -0700 Subject: [PATCH] debugging untied task support Untied tasks now work in all situations execept for direct actions that have their parents yielded while executing. That will be supported later. In the meantime, this commit includes fixed support for untied tasks, and debugged multiple parent support. Support for the taskstubs API calls add_parents and add_children has been added, and tested with tasktree and trace output. Preload support has been debugged and refactored. An apex_taskstubs_cpp test was added to test the taskstubs API implementations. --- CMakeLists.txt | 7 + src/apex/address_resolution.cpp | 4 +- src/apex/apex_preload.cpp | 26 ++- src/apex/taskstubs_implementation.cpp | 48 +++-- src/unit_tests/C++/CMakeLists.txt | 5 +- src/unit_tests/C++/apex_multiple_parents.cpp | 2 +- src/unit_tests/C++/apex_taskstubs.cpp | 212 +++++++++++++++++++ 7 files changed, 281 insertions(+), 23 deletions(-) create mode 100644 src/unit_tests/C++/apex_taskstubs.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cf823100..3d589bc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -969,6 +969,13 @@ else() "Try manually check out https://github.com/khuck/taskStubs.git to ${PROJECT_SOURCE_DIR}") endif() +if(APEX_BUILD_TESTS) + # Include path needed for example in src/unit_tests/C++ + include_directories(${PROJECT_SOURCE_DIR}/taskStubs/timer_plugin) + # Build the taskstubs library for testing purposes + add_subdirectory (taskStubs) +endif(APEX_BUILD_TESTS) + if(APEX_WITH_PLUGINS) message(INFO " apex will be built with plugin support.") set(LIBS ${LIBS} ${CMAKE_DL_LIBS}) diff --git a/src/apex/address_resolution.cpp b/src/apex/address_resolution.cpp index 795bb82b..86ee3484 100644 --- a/src/apex/address_resolution.cpp +++ b/src/apex/address_resolution.cpp @@ -124,7 +124,9 @@ namespace apex { node->info.filename = strdup(info.dli_fname); } if (info.dli_sname == nullptr) { - node->info.funcname = strdup("unknown"); + stringstream ss; + ss << "UNRESOLVED ADDR 0x" << hex << ip; + node->info.funcname = strdup(ss.str().c_str()); } else { node->info.funcname = strdup(info.dli_sname); } diff --git a/src/apex/apex_preload.cpp b/src/apex/apex_preload.cpp index 48f2b979..c4f29854 100644 --- a/src/apex/apex_preload.cpp +++ b/src/apex/apex_preload.cpp @@ -51,15 +51,31 @@ static int (*main_real)(int, char**, char**); int apex_preload_main(int argc, char** argv, char** envp) { // FIRST! check to see if this is a bash script. if so, DO NOTHING size_t len{strlen(argv[0])}; - if (len > 4 && strncmp(argv[0] + (len - 4), "bash", 4) == 0) { + // little lambda for making sure we wrap the right executable + const auto validate_argv0 = [&](const char * needle){ + size_t needle_len{strlen(needle)}; + if (len > needle_len && + (strncmp(argv[0] + (len - needle_len), needle, needle_len)) == 0) { + fputs("zs: skipping ", stderr); + fputs(argv[0], stderr); + fputs("!\n", stderr); + return true; + } + return false; + }; + if (validate_argv0("bash")) { + return main_real(argc, argv, envp); + } + // Next! check to see if this is a [t]csh script. if so, DO NOTHING + else if (validate_argv0("csh")) { return main_real(argc, argv, envp); } - // FIRST! check to see if this is a [t]csh script. if so, DO NOTHING - if (len > 3 && strncmp(argv[0] + (len - 3), "csh", 3) == 0) { + // Then! check to see if this is gdb. if so, DO NOTHING (should get caught by the apex_exec script though) + else if (validate_argv0("gdb")) { return main_real(argc, argv, envp); } - // FIRST! check to see if this is gdb. if so, DO NOTHING (should get caught by the apex_exec script though) - if (len > 3 && strncmp(argv[0] + (len - 3), "gdb", 3) == 0) { + // finally! check for a fork from cuda-gdb + else if (validate_argv0("NvDebugAgent")) { return main_real(argc, argv, envp); } // prevent re-entry diff --git a/src/apex/taskstubs_implementation.cpp b/src/apex/taskstubs_implementation.cpp index 318328cb..ec06f7bf 100644 --- a/src/apex/taskstubs_implementation.cpp +++ b/src/apex/taskstubs_implementation.cpp @@ -29,9 +29,9 @@ maptype& getMyMap(void) { } void safePrint(const char * format, tasktimer_guid_t guid) { - return; std::scoped_lock lock{mtx}; printf("%lu %s GUID %lu\n", apex::thread_instance::get_id(), format, guid); + return; } void safeInsert( @@ -93,7 +93,6 @@ extern "C" { const uint64_t parent_count) { static bool& over = apex::get_program_over(); if (over) return nullptr; - // TODO: need to handle multiple parents! // need to look up the parent shared pointers? std::vector> parent_tasks; for (uint64_t i = 0 ; i < parent_count ; i++) { @@ -103,6 +102,7 @@ extern "C" { } // if no name, use address if (timer_name == nullptr || strlen(timer_name) == 0) { + printf("Null name for timer: %p\n", function_address); if (parent_count > 0) { auto task = apex::new_task( (apex_function_address)function_address, @@ -153,7 +153,7 @@ extern "C" { // TODO: capture the execution space, somehow...a new task? MAP_TASK(timer, apex_timer); if (apex_timer != nullptr) { - apex::start(apex_timer); + apex::start(apex_timer); } } void tasktimer_yield_impl( @@ -162,7 +162,7 @@ extern "C" { if (over) return; MAP_TASK(timer, apex_timer); if (apex_timer != nullptr) { - apex::yield(apex_timer); + apex::yield(apex_timer); } } void tasktimer_resume_impl( @@ -172,23 +172,23 @@ extern "C" { MAP_TASK(timer, apex_timer); // TODO: why no resume function for task_wrapper objects? if (apex_timer != nullptr) { - apex::start(apex_timer); + apex::start(apex_timer); } } void tasktimer_stop_impl( tasktimer_timer_t timer) { MAP_TASK(timer, apex_timer); if (apex_timer != nullptr) { - apex::stop(apex_timer); + apex::stop(apex_timer); } } void tasktimer_destroy_impl( tasktimer_timer_t timer) { MAP_TASK(timer, apex_timer); if (apex_timer != nullptr) { - // TODO: need to handle the destroy event somehow. - // definitely need to remove it from the local map. - safeErase(apex_timer->guid); + // TODO: need to handle the destroy event somehow. + // definitely need to remove it from the local map. + safeErase(apex_timer->guid); } } void tasktimer_add_parents_impl ( @@ -197,10 +197,19 @@ extern "C" { // TODO: need to handle the add parents event MAP_TASK(timer, apex_timer); if (apex_timer != nullptr) { - APEX_UNUSED(apex_timer); + for (uint64_t i = 0 ; i < parent_count ; i++) { + auto tmp = safeLookup(parents[i]); + if (tmp != nullptr) { + // add the parent to the child + apex_timer->parents.push_back(tmp); + } + } + // update the child tree + if (apex::apex_options::use_tasktree_output() || + apex::apex_options::use_hatchet_output()) { + apex_timer->assign_heritage(); + } } - APEX_UNUSED(parents); - APEX_UNUSED(parent_count); } void tasktimer_add_children_impl( tasktimer_timer_t timer, @@ -208,10 +217,19 @@ extern "C" { // TODO: need to handle the add children event MAP_TASK(timer, apex_timer); if (apex_timer != nullptr) { - APEX_UNUSED(apex_timer); + for (uint64_t i = 0 ; i < child_count ; i++) { + auto tmp = safeLookup(children[i]); + if (tmp != nullptr) { + // add the parent to the child + tmp->parents.push_back(apex_timer); + // update the child tree + if (apex::apex_options::use_tasktree_output() || + apex::apex_options::use_hatchet_output()) { + tmp->assign_heritage(); + } + } + } } - APEX_UNUSED(children); - APEX_UNUSED(child_count); } void timerStack( diff --git a/src/unit_tests/C++/CMakeLists.txt b/src/unit_tests/C++/CMakeLists.txt index 26423c6e..80f04fae 100644 --- a/src/unit_tests/C++/CMakeLists.txt +++ b/src/unit_tests/C++/CMakeLists.txt @@ -48,6 +48,7 @@ set(example_programs apex_malloc apex_std_thread apex_multiple_parents + apex_taskstubs ${APEX_OPENMP_TEST} ) #apex_set_thread_cap @@ -83,10 +84,12 @@ foreach(example_program ${example_programs}) set(sources ${example_program}.cpp) source_group("Source Files" FILES ${sources}) add_executable("${example_program}_cpp" ${sources}) - target_link_libraries ("${example_program}_cpp" apex ${LIBS}) + target_link_libraries ("${example_program}_cpp" apex ${LIBS} timer_plugin) if (BUILD_STATIC_EXECUTABLES) set_target_properties("${example_program}_cpp" PROPERTIES LINK_SEARCH_START_STATIC 1 LINK_SEARCH_END_STATIC 1) endif() + # This is needed to make sure local symbols are exported and we can dladdr them + set_property(TARGET "${example_program}_cpp" PROPERTY ENABLE_EXPORTS ON) add_dependencies ("${example_program}_cpp" apex) add_dependencies (tests "${example_program}_cpp") add_test ("test_${example_program}_cpp" "${example_program}_cpp") diff --git a/src/unit_tests/C++/apex_multiple_parents.cpp b/src/unit_tests/C++/apex_multiple_parents.cpp index e9db395c..9d550d56 100644 --- a/src/unit_tests/C++/apex_multiple_parents.cpp +++ b/src/unit_tests/C++/apex_multiple_parents.cpp @@ -24,7 +24,7 @@ int child (int in, std::shared_ptr< apex::task_wrapper > this_task) { return in; } -int main(int argc, char *argv[]) { +int main([[maybe_unused]] int argc, [[maybe_unused]] char *argv[]) { int comm_rank = 0; int comm_size = 1; #ifdef APEX_ENABLE_MPI diff --git a/src/unit_tests/C++/apex_taskstubs.cpp b/src/unit_tests/C++/apex_taskstubs.cpp new file mode 100644 index 00000000..f2a35eb3 --- /dev/null +++ b/src/unit_tests/C++/apex_taskstubs.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2014-2021 Kevin Huck + * Copyright (c) 2014-2021 University of Oregon + * + * Distributed under the BSD 2-Clause Software License. (See accompanying + * file LICENSE) + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include "timer_plugin/tasktimer.h" + +/* ISO C doesn't allow __PRETTY_FUNCTION__, so only do it with C++ */ +#if defined(__GNUC__) && defined(__cplusplus) +#define __APEX__FUNCTION__ __PRETTY_FUNCTION__ +#else +#define __APEX__FUNCTION__ __func__ +#endif + +uint64_t _my_gettid(void) { + pid_t x = syscall(SYS_gettid); + return (uint64_t)(x); +} + +/* This simple example is truly overkill, but it tests all aspects of the API. */ + +std::atomic guid{0}; + +void A(uint64_t); +void B(uint64_t, uint64_t); +void C(uint64_t, uint64_t); +void D(void); +void E(void); +void F(void); +void xfer(void); + +void A(uint64_t parent) { + uint64_t parents[] = {parent}; + uint64_t myguid = guid++; + // both address and name + TASKTIMER_CREATE(&A, __APEX__FUNCTION__, myguid, parents, 1, tt_A); + tasktimer_argument_value_t args[1]; + args[0].type = TASKTIMER_LONG_INTEGER_TYPE; + args[0].l_value = parent; + TASKTIMER_SCHEDULE(tt_A, args, 1); + tasktimer_execution_space_t resource; + resource.type = TASKTIMER_DEVICE_CPU; + resource.device_id = 0; + resource.instance_id = _my_gettid(); + TASKTIMER_START(tt_A, &resource); + B(parent, myguid); + C(parent, myguid); + TASKTIMER_STOP(tt_A); +} + +void B(uint64_t parent1, uint64_t parent2) { + uint64_t parents[] = {parent1, parent2}; + uint64_t myguid = guid++; + // both address and name + TASKTIMER_CREATE(&B, __APEX__FUNCTION__, myguid, parents, 2, tt_B); + tasktimer_argument_value_t args[2]; + args[0].type = TASKTIMER_LONG_INTEGER_TYPE; + args[0].l_value = parent1; + args[1].type = TASKTIMER_LONG_INTEGER_TYPE; + args[1].l_value = parent2; + TASKTIMER_SCHEDULE(tt_B, args, 2); + tasktimer_execution_space_t resource; + resource.type = TASKTIMER_DEVICE_CPU; + resource.device_id = 0; + resource.instance_id = _my_gettid(); + TASKTIMER_START(tt_B, &resource); + TASKTIMER_STOP(tt_B); +} + +void C(uint64_t parent1, uint64_t parent2) { + uint64_t parents[] = {parent1, parent2}; + uint64_t myguid = guid++; + // no name, just address + TASKTIMER_CREATE(&C, nullptr, myguid, parents, 2, tt_C); + tasktimer_argument_value_t args[2]; + args[0].type = TASKTIMER_LONG_INTEGER_TYPE; + args[0].l_value = parent1; + args[1].type = TASKTIMER_LONG_INTEGER_TYPE; + args[1].l_value = parent2; + TASKTIMER_SCHEDULE(tt_C, args, 2); + tasktimer_execution_space_t resource; + resource.type = TASKTIMER_DEVICE_CPU; + resource.device_id = 0; + resource.instance_id = _my_gettid(); + TASKTIMER_START(tt_C, &resource); + D(); + xfer(); + E(); + xfer(); + F(); + TASKTIMER_STOP(tt_C); +} + +void D(void) { + TASKTIMER_COMMAND_START(__APEX__FUNCTION__); + TASKTIMER_COMMAND_STOP(); +} + +void E(void) { + TASKTIMER_COMMAND_START(__APEX__FUNCTION__); + TASKTIMER_COMMAND_STOP(); +} + +void F(void) { + TASKTIMER_COMMAND_START(__APEX__FUNCTION__); + TASKTIMER_COMMAND_STOP(); +} + +void xfer(void) { + constexpr uint64_t maxlen = 1024; + std::array source{1}; + std::array dest{0}; + tasktimer_execution_space_t source_info, dest_info; + tasktimer_execution_space_p sip = &source_info; + tasktimer_execution_space_p dip = &dest_info; + source_info.type = TASKTIMER_DEVICE_CPU; + source_info.device_id = 0; + source_info.instance_id = 0; + dest_info.type = TASKTIMER_DEVICE_CPU; + dest_info.device_id = 0; + dest_info.instance_id = 0; + TASKTIMER_DATA_TRANSFER_START(100, sip, "source", source.data(), dip, "dest", dest.data()); + std::copy(std::begin(source), std::end(source), std::begin(dest)); + TASKTIMER_DATA_TRANSFER_STOP(100); +} + +tasktimer_execution_space_t make_resource(void){ + tasktimer_execution_space_t resource; + resource.type = TASKTIMER_DEVICE_CPU; + resource.device_id = 0; + resource.instance_id = _my_gettid(); + return resource; +} + +void add_parent_test(uint64_t parent) { + uint64_t parents[] = {parent}; + uint64_t myguid = guid++; + // both address and name + TASKTIMER_CREATE(nullptr, __APEX__FUNCTION__, myguid, parents, 1, tt_add_parent_test); + TASKTIMER_SCHEDULE(tt_add_parent_test, nullptr, 0); + auto resource = make_resource(); + TASKTIMER_START(tt_add_parent_test, &resource); + // make a new timer with no parent + uint64_t newparent = guid++; + TASKTIMER_CREATE(nullptr, "added_parent", newparent, nullptr, 0, tt_newparent); + TASKTIMER_ADD_PARENTS(tt_newparent, parents, 1); + TASKTIMER_SCHEDULE(tt_newparent, nullptr, 0); + TASKTIMER_START(tt_newparent, &resource); + TASKTIMER_STOP(tt_newparent); + TASKTIMER_STOP(tt_add_parent_test); +} + +void add_child_test(tasktimer_timer_t parent) { + // create without a parent + uint64_t myguid = guid++; + TASKTIMER_CREATE(nullptr, __APEX__FUNCTION__, myguid, nullptr, 0, tt_add_child_test); + TASKTIMER_SCHEDULE(tt_add_child_test, nullptr, 0); + auto resource = make_resource(); + TASKTIMER_START(tt_add_child_test, &resource); + // make another timer with no parent + uint64_t newchild = guid++; + TASKTIMER_CREATE(nullptr, "added_child", newchild, nullptr, 0, tt_newchild); + uint64_t children[] = {myguid,newchild}; + TASKTIMER_ADD_CHILDREN(parent, children, 2); + TASKTIMER_SCHEDULE(tt_newchild, nullptr, 0); + TASKTIMER_START(tt_newchild, &resource); + TASKTIMER_STOP(tt_newchild); + TASKTIMER_STOP(tt_add_child_test); +} + +int main(int argc, char * argv[]) { + // initialize the timer plugin + TASKTIMER_INITIALIZE(); + uint64_t myguid = guid++; + // no address, just name + TASKTIMER_CREATE(nullptr, __APEX__FUNCTION__, myguid, nullptr, 0, tt); + // schedule the task + TASKTIMER_SCHEDULE(tt, nullptr, 0); + // execute the task on CPU 0, thread_id + tasktimer_execution_space_t resource; + resource.type = TASKTIMER_DEVICE_CPU; + resource.device_id = 0; + resource.instance_id = _my_gettid(); + TASKTIMER_START(tt, &resource); + // yield the task + TASKTIMER_YIELD(tt); + // run a "child" task + A(myguid); + // test the "add_parent" feature + add_parent_test(myguid); + // test the "add_child" feature + add_child_test(tt); + // resume the task + TASKTIMER_RESUME(tt, &resource); + // stop the task + TASKTIMER_STOP(tt); + // finalize the timer plugin + TASKTIMER_FINALIZE(); + return 0; +}