diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e2779c31..506807466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Add logs flush on crash. This is not available for macOS with the `crashpad` backend. ([#1404](https://github.com/getsentry/sentry-native/pull/1404)) - Make narrow UTF-8 the canonical path encoding on Windows. ([#1413](https://github.com/getsentry/sentry-native/pull/1413)) - Re-add setting thread name for Windows transport. ([#1424](https://github.com/getsentry/sentry-native/pull/1424)) +- Fix AOT interop with managed .NET runtimes. ([#1392](https://github.com/getsentry/sentry-native/pull/1392)) **Internal**: diff --git a/src/backends/sentry_backend_inproc.c b/src/backends/sentry_backend_inproc.c index e19bf25d3..8a8755a75 100644 --- a/src/backends/sentry_backend_inproc.c +++ b/src/backends/sentry_backend_inproc.c @@ -456,9 +456,48 @@ registers_from_uctx(const sentry_ucontext_t *uctx) return registers; } +#ifdef SENTRY_PLATFORM_LINUX +static uintptr_t +get_stack_pointer(const sentry_ucontext_t *uctx) +{ +# if defined(__i386__) + return uctx->user_context->uc_mcontext.gregs[REG_ESP]; +# elif defined(__x86_64__) + return uctx->user_context->uc_mcontext.gregs[REG_RSP]; +# elif defined(__arm__) + return uctx->user_context->uc_mcontext.arm_sp; +# elif defined(__aarch64__) + return uctx->user_context->uc_mcontext.sp; +# else + SENTRY_WARN("get_stack_pointer is not implemented for this architecture. " + "Signal chaining may not work as expected."); + return NULL; +# endif +} + +static uintptr_t +get_instruction_pointer(const sentry_ucontext_t *uctx) +{ +# if defined(__i386__) + return uctx->user_context->uc_mcontext.gregs[REG_EIP]; +# elif defined(__x86_64__) + return uctx->user_context->uc_mcontext.gregs[REG_RIP]; +# elif defined(__arm__) + return uctx->user_context->uc_mcontext.arm_pc; +# elif defined(__aarch64__) + return uctx->user_context->uc_mcontext.pc; +# else + SENTRY_WARN( + "get_instruction_pointer is not implemented for this architecture. " + "Signal chaining may not work as expected."); + return NULL; +# endif +} +#endif + static sentry_value_t -make_signal_event( - const struct signal_slot *sig_slot, const sentry_ucontext_t *uctx) +make_signal_event(const struct signal_slot *sig_slot, + const sentry_ucontext_t *uctx, sentry_handler_strategy_t strategy) { sentry_value_t event = sentry_value_new_event(); sentry_value_set_by_key( @@ -496,8 +535,10 @@ make_signal_event( "captured backtrace from ucontext with %lu frames", frame_count); // if unwinding from a ucontext didn't yield any results, try again with a // direct unwind. this is most likely the case when using `libbacktrace`, - // since that does not allow to unwind from a ucontext at all. - if (!frame_count) { + // since that does not allow to unwind from a ucontext at all. the fallback + // is skipped with the "chain at start" strategy because `libbacktrace` + // crashes, and would likely not provide helpful information anyway. + if (!frame_count && strategy != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) { frame_count = sentry_unwind_stack(NULL, &backtrace[0], MAX_FRAMES); } SENTRY_DEBUGF("captured backtrace with %lu frames", frame_count); @@ -534,20 +575,7 @@ handle_ucontext(const sentry_ucontext_t *uctx) SENTRY_INFO("entering signal handler"); - const struct signal_slot *sig_slot = NULL; - for (int i = 0; i < SIGNAL_COUNT; ++i) { -#ifdef SENTRY_PLATFORM_UNIX - if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) { -#elif defined SENTRY_PLATFORM_WINDOWS - if (SIGNAL_DEFINITIONS[i].signum - == uctx->exception_ptrs.ExceptionRecord->ExceptionCode) { -#else -# error Unsupported platform -#endif - sig_slot = &SIGNAL_DEFINITIONS[i]; - } - } - + sentry_handler_strategy_t strategy = SENTRY_HANDLER_STRATEGY_DEFAULT; #ifdef SENTRY_PLATFORM_UNIX // inform the sentry_sync system that we're in a signal handler. This will // make mutexes spin on a spinlock instead as it's no longer safe to use a @@ -556,42 +584,77 @@ handle_ucontext(const sentry_ucontext_t *uctx) #endif SENTRY_WITH_OPTIONS (options) { - // Flush logs in a crash-safe manner before crash handling - if (options->enable_logs) { - sentry__logs_flush_crash_safe(); - } #ifdef SENTRY_PLATFORM_LINUX // On Linux (and thus Android) CLR/Mono converts signals provoked by // AOT/JIT-generated native code into managed code exceptions. In these // cases, we shouldn't react to the signal at all and let their handler // discontinue the signal chain by invoking the runtime handler before // we process the signal. - if (sentry_options_get_handler_strategy(options) - == SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) { + strategy = sentry_options_get_handler_strategy(options); + if (strategy == SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) { SENTRY_DEBUG("defer to runtime signal handler at start"); // there is a good chance that we won't return from the previous // handler and that would mean we couldn't enter this handler with // the next signal coming in if we didn't "leave" here. sentry__leave_signal_handler(); + if (!options->enable_logging_when_crashed) { + sentry__logger_enable(); + } + + uintptr_t ip = get_instruction_pointer(uctx); + uintptr_t sp = get_stack_pointer(uctx); // invoke the previous handler (typically the CLR/Mono // signal-to-managed-exception handler) invoke_signal_handler( uctx->signum, uctx->siginfo, (void *)uctx->user_context); + // If the execution returns here in AOT mode, and the instruction + // or stack pointer were changed, it means CLR/Mono converted the + // signal into a managed exception and transferred execution to a + // managed exception handler. + // https://github.com/dotnet/runtime/blob/6d96e28597e7da0d790d495ba834cc4908e442cd/src/mono/mono/mini/exceptions-arm64.c#L538 + if (ip != get_instruction_pointer(uctx) + || sp != get_stack_pointer(uctx)) { + SENTRY_DEBUG("runtime converted the signal to a managed " + "exception, we do not handle the signal"); + return; + } + // let's re-enter because it means this was an actual native crash + if (!options->enable_logging_when_crashed) { + sentry__logger_disable(); + } sentry__enter_signal_handler(); SENTRY_DEBUG( "return from runtime signal handler, we handle the signal"); } #endif + const struct signal_slot *sig_slot = NULL; + for (int i = 0; i < SIGNAL_COUNT; ++i) { +#ifdef SENTRY_PLATFORM_UNIX + if (SIGNAL_DEFINITIONS[i].signum == uctx->signum) { +#elif defined SENTRY_PLATFORM_WINDOWS + if (SIGNAL_DEFINITIONS[i].signum + == uctx->exception_ptrs.ExceptionRecord->ExceptionCode) { +#else +# error Unsupported platform +#endif + sig_slot = &SIGNAL_DEFINITIONS[i]; + } + } + #ifdef SENTRY_PLATFORM_UNIX // use a signal-safe allocator before we tear down. sentry__page_allocator_enable(); #endif + // Flush logs in a crash-safe manner before crash handling + if (options->enable_logs) { + sentry__logs_flush_crash_safe(); + } - sentry_value_t event = make_signal_event(sig_slot, uctx); + sentry_value_t event = make_signal_event(sig_slot, uctx, strategy); bool should_handle = true; sentry__write_crash_marker(options); @@ -647,8 +710,10 @@ handle_ucontext(const sentry_ucontext_t *uctx) // forward as we're not restoring the page allocator. reset_signal_handlers(); sentry__leave_signal_handler(); - invoke_signal_handler( - uctx->signum, uctx->siginfo, (void *)uctx->user_context); + if (strategy != SENTRY_HANDLER_STRATEGY_CHAIN_AT_START) { + invoke_signal_handler( + uctx->signum, uctx->siginfo, (void *)uctx->user_context); + } #endif } diff --git a/tests/fixtures/dotnet_signal/Program.cs b/tests/fixtures/dotnet_signal/Program.cs index 951adaaf7..4e6217ac3 100644 --- a/tests/fixtures/dotnet_signal/Program.cs +++ b/tests/fixtures/dotnet_signal/Program.cs @@ -45,7 +45,7 @@ static void Main(string[] args) { native_crash(); } - else + else if (args.Contains("managed-exception")) { try { @@ -55,10 +55,13 @@ static void Main(string[] args) } catch (NullReferenceException exception) { - Console.WriteLine("dereference another NULL object from managed code"); - var s = default(string); - var c = s.Length; } } + else if (args.Contains("unhandled-managed-exception")) + { + Console.WriteLine("dereference a NULL object from managed code (unhandled)"); + var s = default(string); + var c = s.Length; + } } } \ No newline at end of file diff --git a/tests/test_dotnet_signals.py b/tests/test_dotnet_signals.py index 8048cc600..9d7fe07c9 100644 --- a/tests/test_dotnet_signals.py +++ b/tests/test_dotnet_signals.py @@ -49,7 +49,11 @@ def run_dotnet(tmp_path, args): def run_dotnet_managed_exception(tmp_path): - return run_dotnet(tmp_path, ["dotnet", "run"]) + return run_dotnet(tmp_path, ["dotnet", "run", "managed-exception"]) + + +def run_dotnet_unhandled_managed_exception(tmp_path): + return run_dotnet(tmp_path, ["dotnet", "run", "unhandled-managed-exception"]) def run_dotnet_native_crash(tmp_path): @@ -84,9 +88,25 @@ def test_dotnet_signals_inproc(cmake): ) # this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception. + # raising a signal that CLR converts to a managed exception, which is then handled by the managed code and + # not leaked out to the native code so no crash is registered. dotnet_run = run_dotnet_managed_exception(tmp_path) dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() + # the program handles the `NullReferenceException`, so the Native SDK won't register a crash. + assert dotnet_run.returncode == 0 + assert not ( + "NullReferenceException" in dotnet_run_stderr + ), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}" + database_path = project_fixture_path / ".sentry-native" + assert database_path.exists(), "No database-path exists" + assert not (database_path / "last_crash").exists(), "A crash was registered" + assert_empty_run_dir(database_path) + + # this runs the dotnet program with the Native SDK and chain-at-start, when managed code raises a signal that CLR convert to an exception. + dotnet_run = run_dotnet_unhandled_managed_exception(tmp_path) + dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() + # the program will fail with a `NullReferenceException`, but the Native SDK won't register a crash. assert dotnet_run.returncode != 0 assert ( @@ -112,3 +132,120 @@ def test_dotnet_signals_inproc(cmake): shutil.rmtree(project_fixture_path / ".sentry-native", ignore_errors=True) shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) shutil.rmtree(project_fixture_path / "obj", ignore_errors=True) + + +def run_aot(tmp_path, args=None): + if args is None: + args = [] + env = os.environ.copy() + env["LD_LIBRARY_PATH"] = str(tmp_path) + ":" + env.get("LD_LIBRARY_PATH", "") + return subprocess.Popen( + [str(tmp_path / "bin/test_dotnet")] + args, + cwd=tmp_path, + env=env, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + +def run_aot_managed_exception(tmp_path): + return run_aot(tmp_path, ["managed-exception"]) + + +def run_aot_unhandled_managed_exception(tmp_path): + return run_aot(tmp_path, ["unhandled-managed-exception"]) + + +def run_aot_native_crash(tmp_path): + return run_aot(tmp_path, ["native-crash"]) + + +@pytest.mark.skipif( + sys.platform != "linux" or is_x86 or is_asan or is_tsan, + reason="dotnet AOT signal handling is currently only supported on 64-bit Linux without sanitizers", +) +def test_aot_signals_inproc(cmake): + try: + # build native client library with inproc and the example for crash dumping + tmp_path = cmake( + ["sentry"], + {"SENTRY_BACKEND": "inproc", "SENTRY_TRANSPORT": "none"}, + ) + + # build the crashing native library + subprocess.run( + [ + "gcc", + "-Wall", + "-Wextra", + "-fPIC", + "-shared", + str(project_fixture_path / "crash.c"), + "-o", + str(tmp_path / "libcrash.so"), + ], + check=True, + ) + + # AOT-compile the dotnet program + subprocess.run( + [ + "dotnet", + "publish", + "-p:PublishAot=true", + "-p:Configuration=Release", + "-o", + str(tmp_path / "bin"), + ], + cwd=project_fixture_path, + check=True, + ) + + # this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException` + # raising a signal that CLR converts to a managed exception, which is then handled by the managed code and + # not leaked out to the native code so no crash is registered. + dotnet_run = run_aot_managed_exception(tmp_path) + dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() + + # the program handles the `NullReferenceException`, so the Native SDK won't register a crash. + assert dotnet_run.returncode == 0 + assert not ( + "NullReferenceException" in dotnet_run_stderr + ), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}" + database_path = tmp_path / ".sentry-native" + assert database_path.exists(), "No database-path exists" + assert not (database_path / "last_crash").exists(), "A crash was registered" + assert_empty_run_dir(database_path) + + # this runs the dotnet program in AOT mode with the Native SDK and chain-at-start, and triggers a `NullReferenceException` + # raising a signal that CLR converts to a managed exception, which is then not handled by the managed code but + # leaked out to the native code so a crash is registered. + dotnet_run = run_aot_unhandled_managed_exception(tmp_path) + dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() + + # the program will fail with a `NullReferenceException`, so the Native SDK will register a crash. + assert dotnet_run.returncode != 0 + assert ( + "NullReferenceException" in dotnet_run_stderr + ), f"Managed exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}" + database_path = tmp_path / ".sentry-native" + assert database_path.exists(), "No database-path exists" + assert (database_path / "last_crash").exists() + assert_run_dir_with_envelope(database_path) + + # this runs the dotnet program with the Native SDK and chain-at-start, when an actual native crash raises a signal + dotnet_run = run_aot_native_crash(tmp_path) + dotnet_run_stdout, dotnet_run_stderr = dotnet_run.communicate() + + # the program will fail with a SIGSEGV, that has been processed by the Native SDK which produced a crash envelope + assert dotnet_run.returncode != 0 + assert ( + "crash has been captured" in dotnet_run_stderr + ), f"Native exception run failed.\nstdout:\n{dotnet_run_stdout}\nstderr:\n{dotnet_run_stderr}" + assert (database_path / "last_crash").exists() + assert_run_dir_with_envelope(database_path) + finally: + shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True) + shutil.rmtree(project_fixture_path / "bin", ignore_errors=True) + shutil.rmtree(project_fixture_path / "obj", ignore_errors=True)