Skip to content

Commit

Permalink
debugging untied task support
Browse files Browse the repository at this point in the history
Untied tasks now work in all situations execept for direct actions
that have their parents yielded while executing. That will be supported
later. In the meantime, this commit includes fixed support for untied
tasks, and debugged multiple parent support. Support for the taskstubs
API calls add_parents and add_children has been added, and tested with
tasktree and trace output. Preload support has been debugged and
refactored. An apex_taskstubs_cpp test was added to test the taskstubs
API implementations.
  • Loading branch information
khuck committed Aug 23, 2024
1 parent 96fa417 commit 1f45a22
Show file tree
Hide file tree
Showing 7 changed files with 281 additions and 23 deletions.
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,13 @@ else()
"Try manually check out https://github.com/khuck/taskStubs.git to ${PROJECT_SOURCE_DIR}")
endif()

if(APEX_BUILD_TESTS)
# Include path needed for example in src/unit_tests/C++
include_directories(${PROJECT_SOURCE_DIR}/taskStubs/timer_plugin)
# Build the taskstubs library for testing purposes
add_subdirectory (taskStubs)
endif(APEX_BUILD_TESTS)

if(APEX_WITH_PLUGINS)
message(INFO " apex will be built with plugin support.")
set(LIBS ${LIBS} ${CMAKE_DL_LIBS})
Expand Down
4 changes: 3 additions & 1 deletion src/apex/address_resolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ namespace apex {
node->info.filename = strdup(info.dli_fname);
}
if (info.dli_sname == nullptr) {
node->info.funcname = strdup("unknown");
stringstream ss;
ss << "UNRESOLVED ADDR 0x" << hex << ip;
node->info.funcname = strdup(ss.str().c_str());
} else {
node->info.funcname = strdup(info.dli_sname);
}
Expand Down
26 changes: 21 additions & 5 deletions src/apex/apex_preload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,31 @@ static int (*main_real)(int, char**, char**);
int apex_preload_main(int argc, char** argv, char** envp) {
// FIRST! check to see if this is a bash script. if so, DO NOTHING
size_t len{strlen(argv[0])};
if (len > 4 && strncmp(argv[0] + (len - 4), "bash", 4) == 0) {
// little lambda for making sure we wrap the right executable
const auto validate_argv0 = [&](const char * needle){
size_t needle_len{strlen(needle)};
if (len > needle_len &&
(strncmp(argv[0] + (len - needle_len), needle, needle_len)) == 0) {
fputs("zs: skipping ", stderr);
fputs(argv[0], stderr);
fputs("!\n", stderr);
return true;
}
return false;
};
if (validate_argv0("bash")) {
return main_real(argc, argv, envp);
}
// Next! check to see if this is a [t]csh script. if so, DO NOTHING
else if (validate_argv0("csh")) {
return main_real(argc, argv, envp);
}
// FIRST! check to see if this is a [t]csh script. if so, DO NOTHING
if (len > 3 && strncmp(argv[0] + (len - 3), "csh", 3) == 0) {
// Then! check to see if this is gdb. if so, DO NOTHING (should get caught by the apex_exec script though)
else if (validate_argv0("gdb")) {
return main_real(argc, argv, envp);
}
// FIRST! check to see if this is gdb. if so, DO NOTHING (should get caught by the apex_exec script though)
if (len > 3 && strncmp(argv[0] + (len - 3), "gdb", 3) == 0) {
// finally! check for a fork from cuda-gdb
else if (validate_argv0("NvDebugAgent")) {
return main_real(argc, argv, envp);
}
// prevent re-entry
Expand Down
48 changes: 33 additions & 15 deletions src/apex/taskstubs_implementation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ maptype& getMyMap(void) {
}

void safePrint(const char * format, tasktimer_guid_t guid) {
return;
std::scoped_lock lock{mtx};
printf("%lu %s GUID %lu\n", apex::thread_instance::get_id(), format, guid);
return;
}

void safeInsert(
Expand Down Expand Up @@ -93,7 +93,6 @@ extern "C" {
const uint64_t parent_count) {
static bool& over = apex::get_program_over();
if (over) return nullptr;
// TODO: need to handle multiple parents!
// need to look up the parent shared pointers?
std::vector<std::shared_ptr<apex::task_wrapper>> parent_tasks;
for (uint64_t i = 0 ; i < parent_count ; i++) {
Expand All @@ -103,6 +102,7 @@ extern "C" {
}
// if no name, use address
if (timer_name == nullptr || strlen(timer_name) == 0) {
printf("Null name for timer: %p\n", function_address);
if (parent_count > 0) {
auto task = apex::new_task(
(apex_function_address)function_address,
Expand Down Expand Up @@ -153,7 +153,7 @@ extern "C" {
// TODO: capture the execution space, somehow...a new task?
MAP_TASK(timer, apex_timer);
if (apex_timer != nullptr) {
apex::start(apex_timer);
apex::start(apex_timer);
}
}
void tasktimer_yield_impl(
Expand All @@ -162,7 +162,7 @@ extern "C" {
if (over) return;
MAP_TASK(timer, apex_timer);
if (apex_timer != nullptr) {
apex::yield(apex_timer);
apex::yield(apex_timer);
}
}
void tasktimer_resume_impl(
Expand All @@ -172,23 +172,23 @@ extern "C" {
MAP_TASK(timer, apex_timer);
// TODO: why no resume function for task_wrapper objects?
if (apex_timer != nullptr) {
apex::start(apex_timer);
apex::start(apex_timer);
}
}
void tasktimer_stop_impl(
tasktimer_timer_t timer) {
MAP_TASK(timer, apex_timer);
if (apex_timer != nullptr) {
apex::stop(apex_timer);
apex::stop(apex_timer);
}
}
void tasktimer_destroy_impl(
tasktimer_timer_t timer) {
MAP_TASK(timer, apex_timer);
if (apex_timer != nullptr) {
// TODO: need to handle the destroy event somehow.
// definitely need to remove it from the local map.
safeErase(apex_timer->guid);
// TODO: need to handle the destroy event somehow.
// definitely need to remove it from the local map.
safeErase(apex_timer->guid);
}
}
void tasktimer_add_parents_impl (
Expand All @@ -197,21 +197,39 @@ extern "C" {
// TODO: need to handle the add parents event
MAP_TASK(timer, apex_timer);
if (apex_timer != nullptr) {
APEX_UNUSED(apex_timer);
for (uint64_t i = 0 ; i < parent_count ; i++) {
auto tmp = safeLookup(parents[i]);
if (tmp != nullptr) {
// add the parent to the child
apex_timer->parents.push_back(tmp);
}
}
// update the child tree
if (apex::apex_options::use_tasktree_output() ||
apex::apex_options::use_hatchet_output()) {
apex_timer->assign_heritage();
}
}
APEX_UNUSED(parents);
APEX_UNUSED(parent_count);
}
void tasktimer_add_children_impl(
tasktimer_timer_t timer,
const tasktimer_guid_t* children, const uint64_t child_count) {
// TODO: need to handle the add children event
MAP_TASK(timer, apex_timer);
if (apex_timer != nullptr) {
APEX_UNUSED(apex_timer);
for (uint64_t i = 0 ; i < child_count ; i++) {
auto tmp = safeLookup(children[i]);
if (tmp != nullptr) {
// add the parent to the child
tmp->parents.push_back(apex_timer);
// update the child tree
if (apex::apex_options::use_tasktree_output() ||
apex::apex_options::use_hatchet_output()) {
tmp->assign_heritage();
}
}
}
}
APEX_UNUSED(children);
APEX_UNUSED(child_count);
}

void timerStack(
Expand Down
5 changes: 4 additions & 1 deletion src/unit_tests/C++/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ set(example_programs
apex_malloc
apex_std_thread
apex_multiple_parents
apex_taskstubs
${APEX_OPENMP_TEST}
)
#apex_set_thread_cap
Expand Down Expand Up @@ -83,10 +84,12 @@ foreach(example_program ${example_programs})
set(sources ${example_program}.cpp)
source_group("Source Files" FILES ${sources})
add_executable("${example_program}_cpp" ${sources})
target_link_libraries ("${example_program}_cpp" apex ${LIBS})
target_link_libraries ("${example_program}_cpp" apex ${LIBS} timer_plugin)
if (BUILD_STATIC_EXECUTABLES)
set_target_properties("${example_program}_cpp" PROPERTIES LINK_SEARCH_START_STATIC 1 LINK_SEARCH_END_STATIC 1)
endif()
# This is needed to make sure local symbols are exported and we can dladdr them
set_property(TARGET "${example_program}_cpp" PROPERTY ENABLE_EXPORTS ON)
add_dependencies ("${example_program}_cpp" apex)
add_dependencies (tests "${example_program}_cpp")
add_test ("test_${example_program}_cpp" "${example_program}_cpp")
Expand Down
2 changes: 1 addition & 1 deletion src/unit_tests/C++/apex_multiple_parents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ int child (int in, std::shared_ptr< apex::task_wrapper > this_task) {
return in;
}

int main(int argc, char *argv[]) {
int main([[maybe_unused]] int argc, [[maybe_unused]] char *argv[]) {
int comm_rank = 0;
int comm_size = 1;
#ifdef APEX_ENABLE_MPI
Expand Down
Loading

0 comments on commit 1f45a22

Please sign in to comment.