Skip to content

Commit

Permalink
Add signal handler on *nix with troubleshooting and stacktrace (#1340)
Browse files Browse the repository at this point in the history
* Add signal handler on *nix with troubleshooting and stacktrace

* Add a way to segfault the app in the debug menu

* Add debug->crash menu button to trigger a stack overflow
  • Loading branch information
emilk authored Feb 17, 2023
1 parent e333e2c commit 3e4c194
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 27 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 54 additions & 12 deletions crates/re_viewer/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1514,23 +1514,65 @@ fn debug_menu(options: &mut AppOptions, ui: &mut egui::Ui) {

ui.separator();

#[allow(clippy::manual_assert)]
if ui.button("panic!").clicked() {
panic!("Intentional panic");
}
ui.menu_button("Crash", |ui| {
#[allow(clippy::manual_assert)]
if ui.button("panic!").clicked() {
panic!("Intentional panic");
}

if ui.button("panic! during unwind").clicked() {
struct PanicOnDrop {}

impl Drop for PanicOnDrop {
fn drop(&mut self) {
panic!("Second intentional panic in Drop::drop");
}
}

if ui.button("panic! during unwind").clicked() {
struct PanicOnDrop {}
let _this_will_panic_when_dropped = PanicOnDrop {};
panic!("First intentional panic");
}

impl Drop for PanicOnDrop {
fn drop(&mut self) {
panic!("Second intentional panic in Drop::drop");
if ui.button("SEGFAULT").clicked() {
// Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs

/// This is the fixed address used to generate a segfault. It's possible that
/// this address can be mapped and writable by the your process in which case a
/// crash may not occur
#[cfg(target_pointer_width = "64")]
pub const SEGFAULT_ADDRESS: u64 = u32::MAX as u64 + 0x42;
#[cfg(target_pointer_width = "32")]
pub const SEGFAULT_ADDRESS: u32 = 0x42;

let bad_ptr: *mut u8 = SEGFAULT_ADDRESS as _;
#[allow(unsafe_code)]
// SAFETY: this is not safe. We are _trying_ to crash.
unsafe {
std::ptr::write_volatile(bad_ptr, 1);
}
}

let _this_will_panic_when_dropped = PanicOnDrop {};
panic!("First intentional panic");
}
if ui.button("Stack overflow").clicked() {
// Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs
fn recurse(data: u64) -> u64 {
let mut buff = [0u8; 256];
buff[..9].copy_from_slice(b"junk data");

let mut result = data;
for c in buff {
result += c as u64;
}

if result == 0 {
result
} else {
recurse(result) + 1
}
}

recurse(42);
}
});
}

// ---
Expand Down
5 changes: 5 additions & 0 deletions crates/rerun/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,8 @@ clap = { workspace = true, features = ["derive"] }
mimalloc = "0.1.29"
puffin_http = "0.11"
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

# Native unix dependencies:
[target.'cfg(not(any(target_arch = "wasm32", target_os = "windows")))'.dependencies]
backtrace = "0.3"
libc = "0.2"
124 changes: 124 additions & 0 deletions crates/rerun/src/crash_handler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
pub fn install_crash_handlers() {
install_panic_hook();

#[cfg(not(target_arch = "wasm32"))]
#[cfg(not(target_os = "windows"))]
install_signal_handler();
}

fn install_panic_hook() {
let previous_panic_hook = std::panic::take_hook();

std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| {
// This prints the callstack etc
(*previous_panic_hook)(panic_info);

eprintln!(
"\n\
Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting"
);
}));
}

#[cfg(not(target_arch = "wasm32"))]
#[cfg(not(target_os = "windows"))]
#[allow(unsafe_code)]
#[allow(clippy::fn_to_numeric_cast_any)]
fn install_signal_handler() {
// SAFETY: we're installing a signal handler.
unsafe {
for signum in [
libc::SIGABRT,
libc::SIGBUS,
libc::SIGFPE,
libc::SIGILL,
libc::SIGINT,
libc::SIGSEGV,
libc::SIGTERM,
] {
libc::signal(
signum,
signal_handler as *const fn(libc::c_int) as libc::size_t,
);
}
}

unsafe extern "C" fn signal_handler(signal_number: libc::c_int) {
let signal_name = match signal_number {
libc::SIGABRT => "SIGABRT",
libc::SIGBUS => "SIGBUS",
libc::SIGFPE => "SIGFPE",
libc::SIGILL => "SIGILL",
libc::SIGINT => "SIGINT",
libc::SIGSEGV => "SIGSEGV",
libc::SIGTERM => "SIGTERM",
_ => "UNKNOWN SIGNAL",
};

// There are very few things that are safe to do in a signal handler,
// but writing to stderr is one of them.
// So we first print out what happened to stderr so we're sure that gets out,
// then we do the unsafe things, like logging the stack trace.
// We take care not to allocate any memory along the way.

write_to_stderr("\n");
write_to_stderr("Rerun caught a signal: ");
write_to_stderr(signal_name);
write_to_stderr("\n");
write_to_stderr(
"Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n",
);

// Ok, we printed the most important things.
// Let's do less important things that require memory allocations.
// Allocating memory can lead to deadlocks if the signal
// was triggered from the system's memory management functions.

print_callstack();

// We seem to have managed printing the callstack - great!
// Then let's print the important stuff _again_ so it is visible at the bottom of the users terminal:

write_to_stderr("\n");
write_to_stderr("Rerun caught a signal: ");
write_to_stderr(signal_name);
write_to_stderr("\n");
write_to_stderr(
"Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n",
);

// We are done!
// Call the default signal handler (which usually terminates the app):
// SAFETY: we're calling a signal handler
unsafe {
libc::signal(signal_number, libc::SIG_DFL);
libc::raise(signal_number);
}
}

fn write_to_stderr(text: &str) {
// SAFETY: writing to stderr is fine, even in a signal handler.
unsafe {
libc::write(libc::STDERR_FILENO, text.as_ptr().cast(), text.len());
}
}

fn print_callstack() {
let backtrace = backtrace::Backtrace::new();
let stack = format!("{backtrace:?}");

// Trim it a bit:
let mut stack = stack.as_str();
let start_pattern = "install_signal_handler::signal_handler\n";
if let Some(start_offset) = stack.find(start_pattern) {
stack = &stack[start_offset + start_pattern.len()..];
}
if let Some(end_offset) =
stack.find("std::sys_common::backtrace::__rust_begin_short_backtrace")
{
stack = &stack[..end_offset];
}

write_to_stderr(stack);
}
}
1 change: 1 addition & 0 deletions crates/rerun/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
#![warn(missing_docs)] // Let's keep the this crate well-documented!

mod crash_handler;
mod run;

pub use run::{run, CallSource};
Expand Down
16 changes: 1 addition & 15 deletions crates/rerun/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ fn run_analytics(cmd: &AnalyticsCommands) -> Result<(), re_analytics::cli::CliEr
}

async fn run_impl(call_source: CallSource, args: Args) -> anyhow::Result<()> {
install_panic_hook();
crate::crash_handler::install_crash_handlers();

let mut profiler = re_viewer::Profiler::default();
if args.profile {
Expand Down Expand Up @@ -332,17 +332,3 @@ fn parse_max_latency(max_latency: Option<&String>) -> f32 {
.unwrap_or_else(|err| panic!("Failed to parse max_latency ({max_latency:?}): {err}"))
})
}

fn install_panic_hook() {
let previous_panic_hook = std::panic::take_hook();

std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| {
// The prints the callstack etc
(*previous_panic_hook)(panic_info);

eprintln!(
"\n\
Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting"
);
}));
}

1 comment on commit 3e4c194

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rust Benchmark

Benchmark suite Current: 3e4c194 Previous: e333e2c Ratio
datastore/insert/batch/rects/insert 540725 ns/iter (± 2227) 551948 ns/iter (± 3032) 0.98
datastore/latest_at/batch/rects/query 1755 ns/iter (± 2) 1765 ns/iter (± 3) 0.99
datastore/latest_at/missing_components/primary 353 ns/iter (± 1) 351 ns/iter (± 0) 1.01
datastore/latest_at/missing_components/secondaries 427 ns/iter (± 1) 422 ns/iter (± 1) 1.01
datastore/range/batch/rects/query 150206 ns/iter (± 860) 149580 ns/iter (± 671) 1.00
mono_points_arrow/generate_message_bundles 49609027 ns/iter (± 943220) 49584371 ns/iter (± 909727) 1.00
mono_points_arrow/generate_messages 136498653 ns/iter (± 2062401) 136243566 ns/iter (± 1307667) 1.00
mono_points_arrow/encode_log_msg 162033433 ns/iter (± 1165883) 164503790 ns/iter (± 1219695) 0.98
mono_points_arrow/encode_total 351528716 ns/iter (± 2442649) 353121555 ns/iter (± 1498272) 1.00
mono_points_arrow/decode_log_msg 184181248 ns/iter (± 938139) 183164672 ns/iter (± 884761) 1.01
mono_points_arrow/decode_message_bundles 73907825 ns/iter (± 1050583) 74478398 ns/iter (± 1070390) 0.99
mono_points_arrow/decode_total 256444627 ns/iter (± 1828361) 255247192 ns/iter (± 1733356) 1.00
batch_points_arrow/generate_message_bundles 325834 ns/iter (± 1558) 324669 ns/iter (± 1606) 1.00
batch_points_arrow/generate_messages 6079 ns/iter (± 44) 6185 ns/iter (± 18) 0.98
batch_points_arrow/encode_log_msg 358387 ns/iter (± 1510) 352961 ns/iter (± 1754) 1.02
batch_points_arrow/encode_total 708351 ns/iter (± 3107) 712605 ns/iter (± 2363) 0.99
batch_points_arrow/decode_log_msg 349352 ns/iter (± 1130) 343561 ns/iter (± 1593) 1.02
batch_points_arrow/decode_message_bundles 2011 ns/iter (± 13) 2025 ns/iter (± 14) 0.99
batch_points_arrow/decode_total 355771 ns/iter (± 1410) 355265 ns/iter (± 898) 1.00
arrow_mono_points/insert 7030803564 ns/iter (± 19043358) 7050992884 ns/iter (± 22598620) 1.00
arrow_mono_points/query 1665206 ns/iter (± 13687) 1670825 ns/iter (± 11378) 1.00
arrow_batch_points/insert 2659189 ns/iter (± 15340) 2574940 ns/iter (± 12710) 1.03
arrow_batch_points/query 16791 ns/iter (± 85) 16728 ns/iter (± 94) 1.00
tuid/Tuid::random 34 ns/iter (± 0) 34 ns/iter (± 0) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.