From bf00b579912a6604f0d318f3df17b1e149b33b3f Mon Sep 17 00:00:00 2001 From: Josh Seba Date: Thu, 12 Oct 2023 09:16:46 -0700 Subject: [PATCH] Add option to abort VMM when parent process dies If Firecracker is being monitored by a parent process that unexpectedly terminates, it will be abandoned up the process tree, likely to a process that doesn't know what do with it (such as init). This becomes even trickier if the process was running in a mount namespace that was controlled by the parent process, as the API socket is now inaccessible. If the parent process was also keeping handles on other resources used by the Firecracker VMM, these could be re-used by new processes and cause conflicts with the now orphaned Firecracker. This adds a flag to set the parent death signal (SIGUSR2 in this instance) that the process will receive when its parent process exits before the VMM does. Receipt of this signal will cause the VMM to abruptly abort, much like the SIGILL signal. While a graceful shutdown would be preferable, since the parent process may have been controlling outside resources for Firecracker (disks, networking, etc.), it's indeterminate whether or not it is safe to continue running the VM. Signed-off-by: Josh Seba --- src/firecracker/src/main.rs | 8 +++++++- src/vmm/src/lib.rs | 2 ++ src/vmm/src/logger/metrics.rs | 3 +++ src/vmm/src/signal_handler.rs | 38 ++++++++++++++++++++++++++++++++--- 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/firecracker/src/main.rs b/src/firecracker/src/main.rs index ef1421e92c3..4630e2a9be1 100644 --- a/src/firecracker/src/main.rs +++ b/src/firecracker/src/main.rs @@ -248,6 +248,11 @@ fn main_exec() -> Result<(), MainError> { Argument::new("mmds-size-limit") .takes_value(true) .help("Mmds data store limit, in bytes."), + ) + .arg( + Argument::new("no-outlive-parent") + .takes_value(false) + .help("Whether to abort the VMM when the parent process dies."), ); arg_parser.parse_from_cmdline()?; @@ -297,7 +302,8 @@ fn main_exec() -> Result<(), MainError> { .map_err(MainError::LoggerInitialization)?; info!("Running Firecracker v{FIRECRACKER_VERSION}"); - register_signal_handlers().map_err(MainError::RegisterSignalHandlers)?; + let register_pdeathsig = arguments.flag_present("no-outlive-parent"); + register_signal_handlers(register_pdeathsig).map_err(MainError::RegisterSignalHandlers)?; #[cfg(target_arch = "aarch64")] enable_ssbd_mitigation(); diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 36e30003706..248430e4fa2 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -178,6 +178,8 @@ pub enum FcExitCode { SIGHUP = 156, /// Firecracker was shut down after intercepting `SIGILL`. SIGILL = 157, + /// Firecracker was shut down after intercepting `SIGUSR2` (parent process has died). + SIGUSR2 = 158, /// Bad configuration for microvm's resources, when using a single json. BadConfiguration = 152, /// Command line arguments parsing error. diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index f333fe04547..28ed39b7d9a 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -814,6 +814,8 @@ pub struct SignalMetrics { pub sighup: SharedStoreMetric, /// Number of times that SIGILL was handled. pub sigill: SharedStoreMetric, + /// Number of times that SIGUSR2 was handled. + pub sigusr2: SharedStoreMetric, } impl SignalMetrics { /// Const default construction. @@ -826,6 +828,7 @@ impl SignalMetrics { sigpipe: SharedIncMetric::new(), sighup: SharedStoreMetric::new(), sigill: SharedStoreMetric::new(), + sigusr2: SharedStoreMetric::new(), } } } diff --git a/src/vmm/src/signal_handler.rs b/src/vmm/src/signal_handler.rs index 0f144365bb7..a6bbd7900f0 100644 --- a/src/vmm/src/signal_handler.rs +++ b/src/vmm/src/signal_handler.rs @@ -2,7 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 use libc::{ - c_int, c_void, siginfo_t, SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ, + c_int, c_void, prctl, siginfo_t, PR_SET_PDEATHSIG, SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV, + SIGSYS, SIGUSR2, SIGXCPU, SIGXFSZ, }; use log::error; use utils::signal::register_signal_handler; @@ -74,6 +75,13 @@ fn log_sigsys_err(si_code: c_int, info: *mut siginfo_t) { ); } +fn log_parent_death_signal(si_code: c_int, _info: *mut siginfo_t) { + error!( + "Shutting down VM after intercepting the parent death signal ({}).", + si_code + ); +} + fn empty_fn(_si_code: c_int, _info: *mut siginfo_t) {} generate_handler!( @@ -123,6 +131,7 @@ generate_handler!( METRICS.signals.sighup, empty_fn ); + generate_handler!( sigill_handler, SIGILL, @@ -131,6 +140,14 @@ generate_handler!( empty_fn ); +generate_handler!( + sigusr2_handler, + SIGUSR2, + SIGUSR2, + METRICS.signals.sigusr2, + log_parent_death_signal +); + #[inline(always)] extern "C" fn sigpipe_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { // Just record the metric and allow the process to continue, the EPIPE error needs @@ -155,7 +172,7 @@ extern "C" fn sigpipe_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_ /// /// Custom handlers are installed for: `SIGBUS`, `SIGSEGV`, `SIGSYS` /// `SIGXFSZ` `SIGXCPU` `SIGPIPE` `SIGHUP` and `SIGILL`. -pub fn register_signal_handlers() -> utils::errno::Result<()> { +pub fn register_signal_handlers(register_pdeathsig: bool) -> utils::errno::Result<()> { // Call to unsafe register_signal_handler which is considered unsafe because it will // register a signal handler which will be called in the current thread and will interrupt // whatever work is done on the current thread, so we have to keep in mind that the registered @@ -168,6 +185,14 @@ pub fn register_signal_handlers() -> utils::errno::Result<()> { register_signal_handler(SIGPIPE, sigpipe_handler)?; register_signal_handler(SIGHUP, sighup_handler)?; register_signal_handler(SIGILL, sigill_handler)?; + + if register_pdeathsig { + register_signal_handler(SIGUSR2, sigusr2_handler)?; + unsafe { + let rc = libc::prctl(PR_SET_PDEATHSIG, SIGUSR2); + assert_eq!(rc, 0); + } + } Ok(()) } @@ -184,7 +209,7 @@ mod tests { #[test] fn test_signal_handler() { let child = thread::spawn(move || { - assert!(register_signal_handlers().is_ok()); + assert!(register_signal_handlers(true).is_ok()); let filter = make_test_seccomp_bpf_filter(); @@ -235,6 +260,12 @@ mod tests { unsafe { syscall(libc::SYS_kill, process::id(), SIGILL); } + + // Call SIGUSR2 signal handler. + assert_eq!(METRICS.signals.sigusr2.fetch(), 0); + unsafe { + syscall(libc::SYS_kill, process::id(), SIGUSR2); + } }); assert!(child.join().is_ok()); @@ -246,6 +277,7 @@ mod tests { assert!(METRICS.signals.sigpipe.count() >= 1); assert!(METRICS.signals.sighup.fetch() >= 1); assert!(METRICS.signals.sigill.fetch() >= 1); + assert!(METRICS.signals.sigusr2.fetch() >= 1); } fn make_test_seccomp_bpf_filter() -> Vec {