diff --git a/echion/coremodule.cc b/echion/coremodule.cc index a1ccdf09..255c6a86 100644 --- a/echion/coremodule.cc +++ b/echion/coremodule.cc @@ -54,7 +54,7 @@ static void do_where(std::ostream& stream) auto interleave_success = interleave_stacks(); if (!interleave_success) { - std::cerr << "could not interleave stacks" << std::endl; + // std::cerr <<"could not interleave stacks" << std::endl; return; } @@ -131,7 +131,7 @@ static inline void _start() do_where(pipe); else - std::cerr << "Failed to open pipe " << pipe_name << std::endl; + // std::cerr <<"Failed to open pipe " << pipe_name << std::endl; running = 0; @@ -178,6 +178,7 @@ static inline void _stop() const std::lock_guard guard(thread_info_map_lock); thread_info_map.clear(); + string_table.dump(std::cerr); string_table.clear(); } diff --git a/echion/danger.h b/echion/danger.h index 5bffa4ca..72c12be7 100644 --- a/echion/danger.h +++ b/echion/danger.h @@ -64,7 +64,7 @@ struct ThreadAltStack { void* mem = mmap(nullptr, kAltStackSize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (mem == MAP_FAILED) { - std::cerr << "Failed to allocate alt stack. Memory copying may not work." << " Error: " << strerror(errno) << std::endl; + // std::cerr <<"Failed to allocate alt stack. Memory copying may not work." << " Error: " << strerror(errno) << std::endl; return -1; } @@ -73,7 +73,7 @@ struct ThreadAltStack { ss.ss_size = kAltStackSize; ss.ss_flags = 0; if (sigaltstack(&ss, nullptr) != 0) { - std::cerr << "Failed to set alt stack. Memory copying may not work." << " Error: " << strerror(errno) << std::endl; + // std::cerr <<"Failed to set alt stack. Memory copying may not work." << " Error: " << strerror(errno) << std::endl; return -1; } diff --git a/echion/render.cc b/echion/render.cc index 0ffd6a0c..00c1eb1b 100644 --- a/echion/render.cc +++ b/echion/render.cc @@ -7,7 +7,7 @@ void WhereRenderer::render_frame(Frame& frame) auto maybe_name_str = string_table.lookup(frame.name); if (!maybe_name_str) { - std::cerr << "could not get name for render_frame" << std::endl; + // std::cerr <<"could not get name for render_frame" << std::endl; return; } const auto& name_str = maybe_name_str->get(); @@ -16,7 +16,7 @@ void WhereRenderer::render_frame(Frame& frame) auto maybe_filename_str = string_table.lookup(frame.filename); if (!maybe_filename_str) { - std::cerr << "could not get filename for render_frame" << std::endl; + // std::cerr <<"could not get filename for render_frame" << std::endl; return; } const auto& filename_str = maybe_filename_str->get(); diff --git a/echion/render.h b/echion/render.h index 8f7afde7..12258757 100644 --- a/echion/render.h +++ b/echion/render.h @@ -210,7 +210,7 @@ class MojoRenderer : public RendererInterface output.open(std::getenv("ECHION_OUTPUT")); if (!output.is_open()) { - std::cerr << "Failed to open output file " << std::getenv("ECHION_OUTPUT") << std::endl; + // std::cerr <<"Failed to open output file " << std::getenv("ECHION_OUTPUT") << std::endl; return ErrorKind::RendererError; } diff --git a/echion/stacks.h b/echion/stacks.h index c531a6dc..4f5880d3 100644 --- a/echion/stacks.h +++ b/echion/stacks.h @@ -281,7 +281,7 @@ static Result interleave_stacks(FrameStack& python_stack) { // We expected a Python frame but we found none, so we report // the native frame instead. - std::cerr << "Expected Python frame(s), found none!" << std::endl; + // std::cerr <<"Expected Python frame(s), found none!" << std::endl; interleaved_stack.push_front(native_frame); } else @@ -311,7 +311,7 @@ static Result interleave_stacks(FrameStack& python_stack) if (p != python_stack.rend()) { - std::cerr << "Python stack not empty after interleaving!" << std::endl; + // std::cerr <<"Python stack not empty after interleaving!" << std::endl; while (p != python_stack.rend()) interleaved_stack.push_front(*p++); } diff --git a/echion/strings.h b/echion/strings.h index 44231151..2f6910ca 100644 --- a/echion/strings.h +++ b/echion/strings.h @@ -229,6 +229,15 @@ class StringTable : public std::unordered_map this->emplace(UNKNOWN, ""); }; + void dump(std::ostream& os) const + { + os << "String table:" << std::endl; + for (const auto& [key, value] : *this) + { + os << " " << key << ": " << value << std::endl; + } + } + private: mutable std::mutex table_lock; }; diff --git a/echion/tasks.h b/echion/tasks.h index 1ed82bd5..e89db61b 100644 --- a/echion/tasks.h +++ b/echion/tasks.h @@ -202,7 +202,7 @@ class TaskInfo } [[nodiscard]] static Result current(PyObject*); - inline size_t unwind(FrameStack&, size_t& upper_python_stack_size); + inline Result unwind(FrameStack&, size_t& upper_python_stack_size); }; inline std::unordered_map task_link_map; @@ -360,39 +360,54 @@ inline std::vector> current_tasks; // ---------------------------------------------------------------------------- -inline size_t TaskInfo::unwind(FrameStack& stack, size_t& upper_python_stack_size) +inline Result TaskInfo::unwind(FrameStack& stack, size_t& upper_python_stack_size) { + // std::cerr <<"Unwinding the Task" << std::endl; // TODO: Check for running task. std::stack coro_frames; // Unwind the coro chain for (auto coro = this->coro.get(); coro != NULL; coro = coro->await.get()) { - if (coro->frame != NULL) + if (coro->frame != NULL) { coro_frames.push(coro->frame); + } } // Total number of frames added to the Stack size_t count = 0; // Unwind the coro frames + // std::cerr <<"Unwinding the coroutine frames, we have " << coro_frames.size() << " coro frames" << std::endl; while (!coro_frames.empty()) { PyObject* frame = coro_frames.top(); coro_frames.pop(); auto new_frames = unwind_frame(frame, stack); + // std::cerr <<"Unwound " << new_frames << " frames" << (new_frames > 1 ? " more than one" : "") << std::endl; + for (size_t i = 0; i < new_frames; i++) { + // std::cerr <<" " << i << ": " << string_table.lookup(stack[stack.size() - i - 1].get().name)->get() << std::endl; + } // If this is the first Frame being unwound (we have not added any Frames to the Stack yet), // use the number of Frames added to the Stack to determine the size of the upper Python stack. if (count == 0) { // The first Frame is the coroutine Frame, so the Python stack size is the number of Frames - 1 upper_python_stack_size = new_frames - 1; + // std::cerr <<"Determining the size of the upper Python stack: " << upper_python_stack_size << std::endl; + + if (this->is_on_cpu && upper_python_stack_size == 0) { + // std::cerr <<"Task is on CPU, but the upper Python stack size is 0. This is not possible." << std::endl; + return ErrorKind::TaskInfoError; + } // Remove the Python Frames from the Stack (they will be added back later) // We cannot push those Frames now because otherwise they would be added once per Task, // we only want to add them once per Leaf Task, and on top of all non-leaf Tasks. + // std::cerr <<"Removing " << upper_python_stack_size << " frames from the Stack" << std::endl; for (size_t i = 0; i < upper_python_stack_size; i++) { + // std::cerr <<" - " << string_table.lookup(stack[stack.size() - 1].get().name)->get() << std::endl; stack.pop_back(); } } diff --git a/echion/threads.h b/echion/threads.h index 30397675..54a132ba 100644 --- a/echion/threads.h +++ b/echion/threads.h @@ -235,6 +235,43 @@ inline void ThreadInfo::unwind(PyThreadState* tstate) // ---------------------------------------------------------------------------- inline Result ThreadInfo::unwind_tasks() { + // std::cerr <<"===== Unwinding tasks ==" << std::endl; + // std::cerr <<"Python stack:" << std::endl; + for (size_t i = 0; i < python_stack.size(); i++) + { + // std::cerr <<" " << i << ": " << string_table.lookup(python_stack[i].get().name)->get() << std::endl; + } + + // Check if the Python stack contains "_run". + // To avoid having to do string comparisons every time we unwind Tasks, we keep track + // of the cache key of the "_run" Frame. + static std::optional frame_cache_key; + bool expect_at_least_one_running_task = false; + if (!frame_cache_key) { + for (size_t i = 0; i < python_stack.size(); i++) { + const auto& frame = python_stack[i].get(); + const auto& name = string_table.lookup(frame.name)->get(); + if (name.size() >= 4 && name.rfind("_run") == name.size() - 4) { + + // Although Frames are stored in an LRUCache, the cache key is ALWAYS the same + // even if the Frame gets evicted from the cache. + // This means we can keep the cache key and re-use it to determine + // whether we see the "_run" Frame in the Python stack. + frame_cache_key = frame.cache_key; + expect_at_least_one_running_task = true; + break; + } + } + } else { + for (size_t i = 0; i < python_stack.size(); i++) { + const auto& frame = python_stack[i].get(); + if (frame.cache_key == *frame_cache_key) { + expect_at_least_one_running_task = true; + break; + } + } + } + std::vector leaf_tasks; std::unordered_set parent_tasks; std::unordered_map waitee_map; // Indexed by task origin @@ -247,6 +284,23 @@ inline Result ThreadInfo::unwind_tasks() } auto all_tasks = std::move(*maybe_all_tasks); + + bool saw_at_least_one_running_task = false; + std::string running_task_name; + for (const auto& task_ref : all_tasks) { + const auto& task = task_ref.get(); + if (task->is_on_cpu) { + running_task_name = string_table.lookup(task->name)->get(); + saw_at_least_one_running_task = true; + break; + } + } + + if (saw_at_least_one_running_task != expect_at_least_one_running_task) { + // std::cerr <<"SKIPSKIP because of inconsistent task state, expected: " << expect_at_least_one_running_task << " saw: " << saw_at_least_one_running_task << " running task: " << running_task_name << std::endl; + return ErrorKind::TaskInfoError; + } + { std::lock_guard lock(task_link_map_lock); @@ -284,6 +338,7 @@ inline Result ThreadInfo::unwind_tasks() { // This task is not running, so we skip it if we are // interested in just CPU time. + // std::cerr <<"SKIPPING TASK " << string_table.lookup(task->name)->get() << " because it is not on CPU" << std::endl; continue; } leaf_tasks.push_back(std::ref(*task)); @@ -295,6 +350,71 @@ inline Result ThreadInfo::unwind_tasks() return ((a.get().is_on_cpu ? 0 : 1) < (b.get().is_on_cpu ? 0 : 1)); }); + // std::cerr <<"Leaf tasks:" << std::endl; + // for (const auto& leaf_task : leaf_tasks) { + // std::cerr <<" " << string_table.lookup(leaf_task.get().name)->get() << " on cpu: " << leaf_task.get().is_on_cpu << std::endl; + // } + + // Print a tree of Task dependencies + // std::cerr <<"Task tree:" << std::endl; + { + // Build parent->children maps + std::unordered_map> children_map; // parent -> children + std::unordered_set has_parent; + + // From gather links + { + std::lock_guard lock(task_link_map_lock); + for (const auto& kv : task_link_map) { + children_map[kv.second].push_back(kv.first); + has_parent.insert(kv.first); + } + } + + // From waiter relationships (waiter is the parent, current task is child) + for (const auto& task : all_tasks) { + if (task->waiter) { + children_map[task->waiter->origin].push_back(task->origin); + has_parent.insert(task->origin); + } + } + + // Find roots (tasks with no parent) + std::vector roots; + for (const auto& task : all_tasks) { + if (has_parent.find(task->origin) == has_parent.end()) { + roots.push_back(task->origin); + } + } + + // Recursive print function + std::function print_tree; + print_tree = [&](PyObject* task_origin, const std::string& prefix, bool is_last) { + auto it = origin_map.find(task_origin); + if (it == origin_map.end()) return; + + const auto& task = it->second.get(); + auto maybe_name = string_table.lookup(task.name); + std::string name_str = maybe_name ? maybe_name->get() : ""; + + // std::cerr <second; + for (size_t i = 0; i < children.size(); i++) { + print_tree(children[i], prefix + (is_last ? " " : "│ "), i == children.size() - 1); + } + } + }; + + for (size_t i = 0; i < roots.size(); i++) { + print_tree(roots[i], "", i == roots.size() - 1); + } + } + + + // The size of the "pure Python" stack (before asyncio Frames), computed later by TaskInfo::unwind size_t upper_python_stack_size = 0; // Unused variable, will be used later by TaskInfo::unwind @@ -303,6 +423,7 @@ inline Result ThreadInfo::unwind_tasks() bool on_cpu_task_seen = false; for (auto& leaf_task : leaf_tasks) { + // std::cerr <<"== Unwinding leaf task: " << string_table.lookup(leaf_task.get().name)->get() << std::endl; auto stack_info = std::make_unique(leaf_task.get().name, leaf_task.get().is_on_cpu); on_cpu_task_seen = on_cpu_task_seen || leaf_task.get().is_on_cpu; @@ -310,9 +431,16 @@ inline Result ThreadInfo::unwind_tasks() for (auto current_task = leaf_task;;) { auto& task = current_task.get(); + // std::cerr <<"= Unwinding task: " << string_table.lookup(task.name)->get() << " on cpu: " << task.is_on_cpu << std::endl; // The task_stack_size includes both the coroutines frames and the "upper" Python synchronous frames - size_t task_stack_size = task.unwind(stack, task.is_on_cpu ? upper_python_stack_size : unused); + auto maybe_task_stack_size = task.unwind(stack, task.is_on_cpu ? upper_python_stack_size : unused); + if (!maybe_task_stack_size) { + // std::cerr <<"SKIPSKIP inconsistent Task " << string_table.lookup(task.name)->get() << std::endl; + return ErrorKind::TaskInfoError; + } + + size_t task_stack_size = std::move(*maybe_task_stack_size); if (task.is_on_cpu) { // Get the "bottom" part of the Python synchronous Stack, that is to say the @@ -321,15 +449,25 @@ inline Result ThreadInfo::unwind_tasks() // subtract the number of Frames in the "upper Python stack" (asyncio machinery + sync entrypoint) // This gives us [outermost coroutine, ... , innermost coroutine, outermost sync function, ... , innermost sync function] size_t frames_to_push = (python_stack.size() > task_stack_size) ? python_stack.size() - task_stack_size : 0; + // std::cerr <<"Task is on CPU, pushing " << frames_to_push << " frames" << std::endl; for (size_t i = 0; i < frames_to_push; i++) { const auto& python_frame = python_stack[frames_to_push - i - 1]; + // std::cerr <<" " << i << ": " << string_table.lookup(python_frame.get().name)->get() << std::endl; stack.push_front(python_frame); } + } else { + // std::cerr <<"Task is not on CPU..." << std::endl; } // Add the task name frame stack.push_back(Frame::get(task.name)); + // std::cerr <<"Pushed task name frame: " << string_table.lookup(task.name)->get() << std::endl; + + // std::cerr <<"Stack at the end for that Task" << std::endl; + for (size_t i = 0; i < stack.size(); i++) { + // std::cerr <<" " << i << ": " << string_table.lookup(stack[i].get().name)->get() << std::endl; + } // Get the next task in the chain PyObject* task_origin = task.origin; @@ -357,11 +495,22 @@ inline Result ThreadInfo::unwind_tasks() // Finish off with the remaining thread stack // If we have seen an on-CPU Task, then upper_python_stack_size will be set and will include the sync entry point // and the asyncio machinery Frames. Otherwise, we are in `select` (idle) and we should push all the Frames. + // std::cerr <<"Pushing the remaining Thread stack" << std::endl; + // for (size_t i = 0; i < python_stack.size() - (on_cpu_task_seen ? upper_python_stack_size : python_stack.size()); i++) { + // const auto& python_frame = python_stack[i]; + // // std::cerr <<" Skipped: " << i << ": " << string_table.lookup(python_frame.get().name)->get() << std::endl; + // } for (size_t i = python_stack.size() - (on_cpu_task_seen ? upper_python_stack_size : python_stack.size()); i < python_stack.size(); i++) { const auto& python_frame = python_stack[i]; + // std::cerr <<" Pushed: " << i << ": " << string_table.lookup(python_frame.get().name)->get() << std::endl; stack.push_back(python_frame); } + // std::cerr <<"Stack after pushing the remaining Thread stack" << std::endl; + for (size_t i = 0; i < stack.size(); i++) { + // std::cerr <<" " << i << ": " << string_table.lookup(stack[i].get().name)->get() << std::endl; + } + current_tasks.push_back(std::move(stack_info)); } diff --git a/echion/vm.h b/echion/vm.h index b8fbefc3..2221a872 100644 --- a/echion/vm.h +++ b/echion/vm.h @@ -143,8 +143,7 @@ class VmReader instance = VmReader::create(1024 * 1024); // A megabyte? if (!instance) { - std::cerr << "Failed to initialize VmReader with buffer size " << instance->sz - << std::endl; + std::cerr <<"Failed to initialize VmReader with buffer size " << instance->sz << std::endl; return nullptr; } } @@ -240,7 +239,7 @@ __attribute__((constructor)) inline void init_safe_copy() return; } - std::cerr << "Failed to initialize segv catcher. Using process_vm_readv instead." << std::endl; + // std::cerr <<"Failed to initialize segv catcher. Using process_vm_readv instead." << std::endl; } char src[128]; @@ -295,7 +294,7 @@ __attribute__((constructor)) inline void init_safe_copy() return; } - std::cerr << "Failed to initialize segv catcher. Using process_vm_readv instead." << std::endl; + // std::cerr <<"Failed to initialize segv catcher. Using process_vm_readv instead." << std::endl; } } #endif // if defined PL_DARWIN