From 776b969d8299f92dfe5f704b3f00b665d3e09ed4 Mon Sep 17 00:00:00 2001 From: Patrick Roy Date: Wed, 3 Jul 2024 16:01:50 +0100 Subject: [PATCH] fix: do not panic if virtio device activation return `Err(...)` When the guest driver sets a virtio devices status to `DRIVER_OK`, we proceed with calling `VirtioDevice::activate`. However, our MMIO transport layer assumes that this activation cannot go wrong, and calls `.expect(...)` on the result. For most devices, this is fine, as the activate method doesn't do much besides writing to an event_fd (and I can't think of a scenario in which this could go wrong). However, our vhost-user-blk device has some non-trivial logic inside of its `activate` method, which includes communication with the vhost-user-backend via a unix socket. If this unix socket gets closed early, this causes `activate` to return an error, and thus consequently a panic in the MMIO code. The virtio spec, in Section 2.2, has the following to say [1]: > The device SHOULD set DEVICE_NEEDS_RESET when it enters an error state that a reset is needed. If DRIVER_OK is set, after it sets DEVICE_NEEDS_RESET, the device MUST send a device configuration change notification to the driver. So the spec-conform way of handling an activation error is setting the `DEVICE_NEEDS_RESET` flag in the device_status field (which is what this commit does). This will fix the panic, however it will most certainly still not result in correct device operations (e.g. a vhost-user-backend dying will still be unrecoverable). This is because Firecracker does not actually implement device reset, see also #3074. Thus, the device will simply be "dead" to the guest. But at least Firecracker won't crash anymore. [1]: https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.pdf Signed-off-by: Patrick Roy --- src/vmm/src/devices/virtio/mmio.rs | 27 ++++++++++++++++++++------- src/vmm/src/devices/virtio/mod.rs | 1 + 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/vmm/src/devices/virtio/mmio.rs b/src/vmm/src/devices/virtio/mmio.rs index c217008a7e98..efe7e31d2280 100644 --- a/src/vmm/src/devices/virtio/mmio.rs +++ b/src/vmm/src/devices/virtio/mmio.rs @@ -11,7 +11,7 @@ use std::sync::{Arc, Mutex, MutexGuard}; use utils::byte_order; -use crate::devices::virtio::device::VirtioDevice; +use crate::devices::virtio::device::{IrqType, VirtioDevice}; use crate::devices::virtio::device_status; use crate::devices::virtio::queue::Queue; use crate::logger::warn; @@ -186,10 +186,18 @@ impl MmioTransport { DRIVER_OK if self.device_status == (ACKNOWLEDGE | DRIVER | FEATURES_OK) => { self.device_status = status; let device_activated = self.locked_device().is_activated(); - if !device_activated && self.are_queues_valid() { - self.locked_device() - .activate(self.mem.clone()) - .expect("Failed to activate device"); + if !device_activated + && self.are_queues_valid() + && self.locked_device().activate(self.mem.clone()).is_err() + { + self.device_status |= DEVICE_NEEDS_RESET; + + // Section 2.1.2 of the specification states that we need to send a device + // configuration change interrupt + let _ = self + .locked_device() + .interrupt_trigger() + .trigger_irq(IrqType::Config); } } _ if (status & FAILED) != 0 => { @@ -306,7 +314,9 @@ impl MmioTransport { 0x20 => { if self.check_device_status( device_status::DRIVER, - device_status::FEATURES_OK | device_status::FAILED, + device_status::FEATURES_OK + | device_status::FAILED + | device_status::DEVICE_NEEDS_RESET, ) { self.locked_device() .ack_features_by_page(self.acked_features_select, v); @@ -339,7 +349,10 @@ impl MmioTransport { } } 0x100..=0xfff => { - if self.check_device_status(device_status::DRIVER, device_status::FAILED) { + if self.check_device_status( + device_status::DRIVER, + device_status::FAILED | device_status::DEVICE_NEEDS_RESET, + ) { self.locked_device().write_config(offset - 0x100, data) } else { warn!("can not write to device config data area before driver is ready"); diff --git a/src/vmm/src/devices/virtio/mod.rs b/src/vmm/src/devices/virtio/mod.rs index 23ab29144015..93b07c44760e 100644 --- a/src/vmm/src/devices/virtio/mod.rs +++ b/src/vmm/src/devices/virtio/mod.rs @@ -40,6 +40,7 @@ mod device_status { pub const FAILED: u32 = 128; pub const FEATURES_OK: u32 = 8; pub const DRIVER_OK: u32 = 4; + pub const DEVICE_NEEDS_RESET: u32 = 64; } /// Types taken from linux/virtio_ids.h.