diff --git a/src/perf_tools/common.cr b/src/perf_tools/common.cr index 6d7aafc..b09ec68 100644 --- a/src/perf_tools/common.cr +++ b/src/perf_tools/common.cr @@ -94,9 +94,6 @@ module PerfTools end struct Exception::CallStack - def initialize(*, __callstack @callstack : Array(Void*)) - end - {% if flag?(:win32) %} {% if flag?(:interpreted) %} @[Primitive(:interpreter_call_stack_unwind)] {% end %} def self.unwind_to(buf : Slice(Void*)) : Nil @@ -104,7 +101,7 @@ struct Exception::CallStack context = Pointer(LibC::CONTEXT).malloc(1) context.value.contextFlags = LibC::CONTEXT_FULL LibC.RtlCaptureContext(context) - + # unlike DWARF, this is required on Windows to even be able to produce # correct stack traces, so we do it here but not in `libunwind.cr` load_debug_info @@ -180,4 +177,83 @@ struct Exception::CallStack LibUnwind.backtrace(backtrace_fn, pointerof(callstack).as(Void*)) end {% end %} + + def self.__perftools_decode_backtrace(stack : Slice(Void*)) : Array(String) + frames = [] of String + stack.each do |ip| + if frame = __perftools_decode_backtrace_frame(ip, ENV["CRYSTAL_CALLSTACK_FULL_INFO"]? == "1") + frames << frame + end + end + frames + end + + # TODO: reduce duplication with #decode_backtrace in stdlib + def self.__perftools_decode_backtrace_frame(ip, show_full_info) : String? + pc = CallStack.decode_address(ip) + file, line_number, column_number = CallStack.decode_line_number(pc) + + if file && file != "??" + return if @@skip.includes?(file) + + # Turn to relative to the current dir, if possible + if current_dir = CURRENT_DIR + if rel = Path[file].relative_to?(current_dir) + rel = rel.to_s + file = rel unless rel.starts_with?("..") + end + end + + file_line_column = file + unless line_number == 0 + file_line_column = "#{file_line_column}:#{line_number}" + file_line_column = "#{file_line_column}:#{column_number}" unless column_number == 0 + end + end + + if name = CallStack.decode_function_name(pc) + function = name + elsif frame = CallStack.decode_frame(ip) + _, function, file = frame + # Crystal methods (their mangled name) start with `*`, so + # we remove that to have less clutter in the output. + function = function.lchop('*') + else + function = "??" + end + + if file_line_column + if show_full_info && (frame = CallStack.decode_frame(ip)) + _, sname, _ = frame + line = "#{file_line_column} in '#{sname}'" + else + line = "#{file_line_column} in '#{function}'" + end + else + if file == "??" && function == "??" + line = "???" + else + line = "#{file} in '#{function}'" + end + end + + if show_full_info + line = "#{line} at 0x#{ip.address.to_s(16)}" + end + + line + end + + def self.__perftools_print_frame(ip : Void*) : Nil + repeated_frame = RepeatedFrame.new(ip) + + {% if flag?(:win32) && !flag?(:gnu) %} + # TODO: can't merely call #print_frame because the UTF-16 to UTF-8 + # conversion is allocating strings, and it's probably a bad idea to + # allocate while the world is stopped. + Crystal::System.print_error "[%p] ???", repeated_frame.ip + {% else %} + print_frame(repeated_frame) + {% end %} + end end diff --git a/src/perf_tools/fiber_trace.cr b/src/perf_tools/fiber_trace.cr index c6b84e7..49cd3b4 100644 --- a/src/perf_tools/fiber_trace.cr +++ b/src/perf_tools/fiber_trace.cr @@ -2,15 +2,6 @@ require "./common" # In-memory tracking of all existing fibers in the running program. module PerfTools::FiberTrace - # :nodoc: - class_getter spawn_stack = {} of Fiber => Array(Void*) - - # :nodoc: - class_getter yield_stack = {} of Fiber => Array(Void*) - - # :nodoc: - class_getter lock = Thread::Mutex.new - {% begin %} # The maximum number of stack frames shown for `FiberTrace.log_fibers` and # `FiberTrace.pretty_log_fibers`. @@ -99,23 +90,23 @@ module PerfTools::FiberTrace # # NOTE: The main fiber of each thread is not shown. def self.log_fibers(io : IO) : Nil - lock.synchronize do - io << spawn_stack.size << '\n' - spawn_stack.each do |fiber, stack| - io << fiber.name << '\n' - - s = Exception::CallStack.new(__callstack: stack).printable_backtrace - io << s.size << '\n' - s.each { |frame| io << frame << '\n' } - - if yield_stack = self.yield_stack[fiber]? - y = Exception::CallStack.new(__callstack: yield_stack).printable_backtrace - io << y.size << '\n' - y.each { |frame| io << frame << '\n' } - else - io << '0' << '\n' - end - end + fibers = [] of Fiber + Fiber.each { |fiber| fibers << fiber } + + io << fibers.size << '\n' + + fibers.each do |fiber| + next if fiber.__spawn_stack.empty? + + io << fiber.name << '\n' + + s = Exception::CallStack.__perftools_decode_backtrace(fiber.__spawn_stack) + io << s.size << '\n' + s.each { |frame| io << frame << '\n' } + + y = Exception::CallStack.__perftools_decode_backtrace(fiber.__yield_stack) + io << y.size << '\n' + y.each { |frame| io << frame << '\n' } end end @@ -145,110 +136,131 @@ module PerfTools::FiberTrace # # NOTE: The main fiber of each thread is not shown. def self.pretty_log_fibers(io : IO) : Nil - lock.synchronize do - uniqs = spawn_stack - .map { |fiber, stack| {fiber.name, stack, yield_stack[fiber]?} } - .group_by { |_, s, y| {s, y} } - .transform_values(&.map { |fiber, _, _| fiber }) - .to_a - .sort_by! { |(s, y), names| {-names.size, s, y || Array(Void*).new} } - - io.puts "| Count | Fibers | Spawn stack | Yield stack |" - io.puts "|------:|:-------|:------------|:------------|" - uniqs.each do |(s, y), names| - s = Exception::CallStack.new(__callstack: s).printable_backtrace - y = y.try { |y| Exception::CallStack.new(__callstack: y).printable_backtrace } - - io << "| " - io << names.size - io << " | " - names.compact.join(io, ' ') { |name| PerfTools.md_code_span(io, name) } - io << " | " - s.join(io, "
") { |frame| PerfTools.md_code_span(io, frame) } - io << " | " - if y - y.join(io, "
") { |frame| PerfTools.md_code_span(io, frame) } - else - io << "*N/A*" - end - io << " |\n" + fibers = [] of Fiber + Fiber.each { |fiber| fibers << fiber } + + uniqs = fibers + .map { |fiber| {fiber.name, fiber.__spawn_stack, fiber.__yield_stack} } + .group_by { |_, s, y| {s, y} } + .transform_values(&.map { |fiber, _, _| fiber }) + .to_a + .sort_by! { |(s, y), names| {-names.size, s, y} } + + io.puts "| Count | Fibers | Spawn stack | Yield stack |" + io.puts "|------:|:-------|:------------|:------------|" + uniqs.each do |(s_, y_), names| + s = Exception::CallStack.__perftools_decode_backtrace(s_) + y = Exception::CallStack.__perftools_decode_backtrace(y_) + + io << "| " + io << names.size + io << " | " + names.compact.join(io, ' ') { |name| PerfTools.md_code_span(io, name) } + io << " | " + s.join(io, "
") { |frame| PerfTools.md_code_span(io, frame) } + io << " | " + if y.size > 0 + y.join(io, "
") { |frame| PerfTools.md_code_span(io, frame) } + else + io << "*N/A*" end + io << " |\n" end end # :nodoc: - macro track_fiber(action, current_fiber) - %stack = Array.new(PerfTools::FiberTrace::STACK_DEPTH + PerfTools::FiberTrace::STACK_SKIP_{{action.upcase.id}}, Pointer(Void).null) - Exception::CallStack.unwind_to(Slice.new(%stack.to_unsafe, %stack.size)) - %stack.truncate(PerfTools::FiberTrace::STACK_SKIP_{{action.upcase.id}}..) - while %stack.last? == Pointer(Void).null - %stack.pop - end - PerfTools::FiberTrace.lock.synchronize do - PerfTools::FiberTrace.{{action.id}}_stack[{{current_fiber}}] = %stack + def self.caller_stack(skip) + size = skip + PerfTools::FiberTrace::STACK_DEPTH + + ptr = GC.malloc_atomic(sizeof(Void*) * size).as(Void**) + ptr.clear(size) + slice = Slice(Void*).new(ptr, size) + + Exception::CallStack.unwind_to(slice) + + stop = -1 + while slice[stop].null? + stop -= 1 end + + slice[0..stop] end end class Fiber - def initialize(@name : String? = nil, &@proc : ->) - previous_def(name, &proc) - PerfTools::FiberTrace.track_fiber(:spawn, self) + # in theory: the slices should always be of exactly DEPTH+SKIP size, so any + # thread can update the slice pointer at any time, the size will never change; + # dereferencing the pointer at any index is always safe and will never raise; + # this allows us to skip any thread synchronization... + # + # in practice: we must access constants to know the actual sizes but accessing + # constants requires initializing Fiber.current (see crystal/once) for the + # main thread which prevents use from knowning the actual size... + # + # also: we pre initialize to an empty slice to avoid a compilation error with + # the original #initialize in src/fiber.cr + # + # solution: check the slices' pointer before dereferencing the slices (if size + # is updated before pointer); also check for size > 0 to avoid an IndexError + # (if size is updated after pointer). + @__spawn_stack = Slice(Void*).new(Pointer(Void*).null, 0) + @__yield_stack = Slice(Void*).new(Pointer(Void*).null, 0) + + {% begin %} + def initialize( + name : String?, + {% if Fiber.has_constant?(:Stack) %}stack : Stack,{% end %} + {% if flag?(:execution_context) %}execution_context : ExecutionContext = ExecutionContext.current,{% end %} + &proc : -> + ) + @__spawn_stack = PerfTools::FiberTrace.caller_stack(PerfTools::FiberTrace::STACK_SKIP_SPAWN) + previous_def( + name, + {% if Fiber.has_constant?(:Stack) %}stack,{% end %} + {% if flag?(:execution_context) %}execution_context,{% end %} + &proc + ) end + {% end %} - def self.inactive(fiber : Fiber) - PerfTools::FiberTrace.lock.synchronize do - PerfTools::FiberTrace.spawn_stack.delete(fiber) - PerfTools::FiberTrace.yield_stack.delete(fiber) + def __spawn_stack + if @__spawn_stack.size > 0 && !@__yield_stack.to_unsafe.null? + @__spawn_stack[PerfTools::FiberTrace::STACK_SKIP_SPAWN..] + else + Slice.new(Pointer(Void*).null, 0) end - previous_def end - # crystal-lang/crystal#13701 - {% if compare_versions(Crystal::VERSION, "1.10.0") < 0 %} - def run - GC.unlock_read - @proc.call - rescue ex - if name = @name - STDERR.print "Unhandled exception in spawn(name: #{name}): " - else - STDERR.print "Unhandled exception in spawn: " - end - ex.inspect_with_backtrace(STDERR) - STDERR.flush - ensure - {% if flag?(:preview_mt) %} - Crystal::Scheduler.enqueue_free_stack @stack - {% elsif flag?(:interpreted) %} - # For interpreted mode we don't need a new stack, the stack is held by the interpreter - {% else %} - Fiber.stack_pool.release(@stack) - {% end %} - - # Remove the current fiber from the linked list - Fiber.inactive(self) - - # Delete the resume event if it was used by `yield` or `sleep` - @resume_event.try &.free - @timeout_event.try &.free - @timeout_select_action = nil - - @alive = false - Crystal::Scheduler.reschedule + def __yield_stack + if @__yield_stack.size > 0 && !@__yield_stack.to_unsafe.null? + @__yield_stack[PerfTools::FiberTrace::STACK_SKIP_YIELD..] + else + Slice.new(Pointer(Void*).null, 0) end - {% end %} -end + end -class Crystal::Scheduler - protected def resume(fiber : Fiber) : Nil - current_fiber = {% if Crystal::Scheduler.instance_vars.any? { |x| x.name == :thread.id } %} - # crystal >= 1.13 - @thread.current_fiber - {% else %} - @current - {% end %} - PerfTools::FiberTrace.track_fiber(:yield, current_fiber) - previous_def + def __yield_stack=(@__yield_stack) end end + +{% if flag?(:execution_context) %} + module Fiber::ExecutionContext::Scheduler + def swapcontext(fiber : Fiber) + Fiber.current.__yield_stack = PerfTools::FiberTrace.caller_stack(PerfTools::FiberTrace::STACK_SKIP_YIELD) + previous_def + end + end +{% else %} + class Crystal::Scheduler + protected def resume(fiber : Fiber) : Nil + current_fiber = {% if Crystal::Scheduler.instance_vars.any? { |x| x.name == :thread.id } %} + # crystal >= 1.13 + @thread.current_fiber + {% else %} + @current + {% end %} + current_fiber.__yield_stack = PerfTools::FiberTrace.caller_stack(PerfTools::FiberTrace::STACK_SKIP_YIELD) + previous_def + end + end +{% end %} diff --git a/src/perf_tools/scheduler_trace.cr b/src/perf_tools/scheduler_trace.cr new file mode 100644 index 0000000..5b8f683 --- /dev/null +++ b/src/perf_tools/scheduler_trace.cr @@ -0,0 +1,175 @@ +{% raise "ERROR: PerfTools::SchedulerTrace require the `execution_context` compilation flag" unless flag?(:execution_context) %} + +require "./common" + +class Fiber + def status : String + if @alive + if @context.@resumable == 1 + "suspended" + else + "running" + end + else + "dead" + end + end +end + +module PerfTools::SchedulerTrace + {% if flag?(:unix) %} + # Installs a signal handler to call `.print_runtime_status` on demand. + # + # Uses `SIGUSR1` by default but you may configure another signal, for + # example `LibC::SIGRTMIN + 7`. You may also register multiple signals, one + # with fiber detail and the another without for example. + def self.on(signal : Int32 = Signal::USR1.value, details : Bool = false) : Nil + if details && Fiber.current.responds_to?(:__yield_stack) + # make sure that debug info has been loaded + Exception::CallStack.load_debug_info + end + + # not using Signal#trap so the signal will be handled directly instead + # of through the event loop that may have to wait (or be blocked in + # the worst case): + action = LibC::Sigaction.new + action.sa_flags = LibC::SA_RESTART + + # can't pass closure to C function, so we register different handlers + if details + action.sa_sigaction = LibC::SigactionHandlerT.new do |_, _, _| + print_runtime_status(details: true) + end + else + action.sa_sigaction = LibC::SigactionHandlerT.new do |_, _, _| + print_runtime_status(details: false) + end + end + + LibC.sigemptyset(pointerof(action.@sa_mask)) + LibC.sigaction(signal, pointerof(action), nil) + end + {% end %} + + # Starts a thread that will call `.print_runtime_status` at every *interval* + # until the program terminates. + def self.every(interval : Time::Span, details = false) : Nil + if details && Fiber.current.responds_to?(:__yield_stack) + # make sure that debug info has been loaded + Exception::CallStack.load_debug_info + end + + Thread.new("SCHEDTRACE") do + loop do + Thread.sleep(interval) + print_runtime_status(details) + end + end + end + + # Stops the world, prints the status of all runtime schedulers, then resumes + # the world. + # + # Set `details` to true to print individual fiber details. + def self.print_runtime_status(details = false) : Nil + Thread.stop_world + + Crystal::System.print_error("sched.details time=%u\n", Crystal::System::Time.ticks) + + Fiber::ExecutionContext.unsafe_each do |execution_context| + print_runtime_status(execution_context, details) + end + + Thread.start_world + end + + private def self.print_runtime_status(execution_context : Fiber::ExecutionContext::MultiThreaded, details = false) : Nil + Crystal::System.print_error("%s name=%s global_queue.size=%d\n", + execution_context.class.name, + execution_context.name, + execution_context.@global_queue.size) + + execution_context.@threads.each do |thread| + print_runtime_status(thread, details) + end + + return unless details + + Fiber.unsafe_each do |fiber| + if fiber.execution_context? == execution_context + print_runtime_status(fiber, details) + end + end + end + + private def self.print_runtime_status(execution_context : Fiber::ExecutionContext::SingleThreaded, details = false) : Nil + Crystal::System.print_error("%s name=%s global_queue.size=%d\n", + execution_context.class.name, + execution_context.name, + execution_context.@global_queue.size) + + print_runtime_status(execution_context.@thread, details) + + return unless details + + Fiber.unsafe_each do |fiber| + if fiber.execution_context? == execution_context + print_runtime_status(fiber, details) + end + end + end + + private def self.print_runtime_status(execution_context : Fiber::ExecutionContext::Isolated, details = false) : Nil + Crystal::System.print_error("%s name=%s\n", execution_context.class.name, execution_context.name) + print_runtime_status(execution_context.@thread, details = false) + end + + private def self.print_runtime_status(thread : Thread, details = false) : Nil + thread_handle = + {% if flag?(:linux) %} + Pointer(Void).new(thread.@system_handle) + {% else %} + thread.@system_handle + {% end %} + + case scheduler = thread.scheduler? + when Fiber::ExecutionContext::MultiThreaded::Scheduler + Crystal::System.print_error(" Scheduler name=%s thread=%p local_queue.size=%u status=%s\n", + scheduler.name, + thread_handle, + scheduler.@runnables.size, + scheduler.status) + when Fiber::ExecutionContext::SingleThreaded + Crystal::System.print_error(" Scheduler name=%s thread=%p local_queue.size=%u status=%s\n", + scheduler.name, + thread_handle, + scheduler.@runnables.size, + scheduler.status) + when Fiber::ExecutionContext::Isolated + Crystal::System.print_error(" Scheduler name=%s thread=%p status=%s\n", + scheduler.name, + thread_handle, + scheduler.status) + end + + return unless details + + if fiber = thread.current_fiber? + Crystal::System.print_error(" Fiber %p name=%s status=running\n", fiber.as(Void*), fiber.name) + end + end + + private def self.print_runtime_status(fiber : Fiber, details = false) : Nil + Crystal::System.print_error(" Fiber %p name=%s status=%s\n", fiber.as(Void*), fiber.name, fiber.status) + + if details && (fiber.status != "running") && fiber.responds_to?(:__yield_stack) + fiber.__yield_stack[PerfTools::FiberTrace::STACK_SKIP_SPAWN..].each do |ip| + Crystal::System.print_error(" ") + Exception::CallStack.__perftools_print_frame(ip) + end + end + end + + # private def self.print_runtime_status(arg : Nil, details = false) : Nil + # end +end