diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e4d7ff55cf..4e0b93e6c04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Added devtool build `--ssh-keys` flag to support fetching from private git repositories. +- Added option to configure block device flush. ### Changed diff --git a/docs/api_requests/block-caching.md b/docs/api_requests/block-caching.md new file mode 100644 index 00000000000..ebbbfea05f4 --- /dev/null +++ b/docs/api_requests/block-caching.md @@ -0,0 +1,70 @@ +# Block device caching strategies + +Firecracker offers the possiblity of choosing the block device caching +strategy. Caching strategy affects the path data written from inside the +microVM takes to the host persistent storage. + +## How it works + +When installing a block device through a PUT /drives API call, users can choose +the caching strategy by inserting a `cache_type` field in the JSON body of the +request. The available cache types are: + +- `Unsafe` +- `Writeback` + +### Unsafe mode (default) + +When configuring the block caching strategy to `Unsafe`, the device will +advertise the VirtIO `flush` feature to the guest driver. If negotiated when +activating the device, the guest driver will be able to send flush requests +to the device, but the device will just acknowledge the request without +actually performing any flushing on the host side. The data which was not +yet committed to disk will continue to reside in the host page cache. + +### Writeback mode + +When configuring the block caching strategy to `Writeback`, the device will +advertise the VirtIO `flush` feature to the guest driver. If negotiated when +activating the device, the guest driver will be able to send flush requests +to the device. When the device executes a flush request, it will perform an +`fsync` syscall on the backing block file, committing all data in the host +page cache to disk. + +## Supported use cases + +The caching strategy should be used in order to make a trade-off: + +- `Unsafe` + - enhances performance as fewer syscalls and IO operations are performed when + running workloads + - sacrifices data integrity in situations where the host simply loses the + contents of the page cache without committing them to the backing storage + (such as a power outage) + - recommended for use cases with ephemeral storage, such as serverless + environments +- `Writeback` + - ensures that once a flush request was acknowledged by the host, the data + is committed to the backing storage + - sacrifices performance, from boot time increases to greater + emulation-related latencies when running workloads + - recommended for use cases with low power environments, such as embedded + environments + +## How to configure it + +Example sequence that configures a block device with a caching strategy: + +```bash +curl --unix-socket ${socket} -i \ + -X PUT "http://localhost/drives/dummy" \ + -H "accept: application/json" \ + -H "Content-Type: application/json" \ + -d "{ + \"drive_id\": \"dummy\", + \"path_on_host\": \"${drive_path}\", + \"is_root_device\": false, + \"is_read_only\": false, + \"cache_type\": \"Writeback\" + }" +``` diff --git a/src/api_server/src/parsed_request.rs b/src/api_server/src/parsed_request.rs index 47e4bbd2794..e52ad6e4197 100644 --- a/src/api_server/src/parsed_request.rs +++ b/src/api_server/src/parsed_request.rs @@ -643,6 +643,7 @@ pub(crate) mod tests { \"is_root_device\": true, \ \"partuuid\": \"string\", \ \"is_read_only\": true, \ + \"cache_type\": \"Unsafe\", \ \"rate_limiter\": { \ \"bandwidth\": { \ \"size\": 0, \ diff --git a/src/api_server/src/request/drive.rs b/src/api_server/src/request/drive.rs index 1a571ddbeb5..22c2696103d 100644 --- a/src/api_server/src/request/drive.rs +++ b/src/api_server/src/request/drive.rs @@ -216,20 +216,33 @@ mod tests { assert!(parse_put_drive(&Body::new("invalid_payload"), None).is_err()); assert!(parse_put_drive(&Body::new("invalid_payload"), Some(&"id")).is_err()); - // PATCH with invalid fields. + // PUT with invalid fields. let body = r#"{ "drive_id": "bar", "is_read_only": false }"#; assert!(parse_put_drive(&Body::new(body), Some(&"2")).is_err()); - // PATCH with invalid types on fields. Adding a drive_id as number instead of string. + // PUT with missing all optional fields. + let body = r#"{ + "drive_id": "1000", + "path_on_host": "dummy", + "is_root_device": true, + "is_read_only": true + }"#; + assert!(parse_put_drive(&Body::new(body), Some(&"1000")).is_ok()); + + // PUT with invalid types on fields. Adding a drive_id as number instead of string. + assert!(parse_put_drive(&Body::new(body), Some(&"foo")).is_err()); + + // PUT with the complete configuration. let body = r#"{ "drive_id": "1000", "path_on_host": "dummy", "is_root_device": true, "partuuid": "string", "is_read_only": true, + "cache_type": "Unsafe", "rate_limiter": { "bandwidth": { "size": 0, @@ -244,7 +257,5 @@ mod tests { } }"#; assert!(parse_put_drive(&Body::new(body), Some(&"1000")).is_ok()); - - assert!(parse_put_drive(&Body::new(body), Some(&"foo")).is_err()); } } diff --git a/src/api_server/swagger/firecracker.yaml b/src/api_server/swagger/firecracker.yaml index 0b1ef5b54e4..5b8e512d326 100644 --- a/src/api_server/swagger/firecracker.yaml +++ b/src/api_server/swagger/firecracker.yaml @@ -753,6 +753,11 @@ definitions: properties: drive_id: type: string + cache_type: + type: string + description: + Represents the caching strategy for the block device. + default: "Unsafe" is_read_only: type: boolean is_root_device: diff --git a/src/devices/src/virtio/block/device.rs b/src/devices/src/virtio/block/device.rs index b15e89db188..76f48d8502d 100644 --- a/src/devices/src/virtio/block/device.rs +++ b/src/devices/src/virtio/block/device.rs @@ -30,8 +30,29 @@ use super::{ use crate::virtio::VIRTIO_MMIO_INT_CONFIG; use crate::Error as DeviceError; +use serde::{Deserialize, Serialize}; + +/// Configuration options for disk caching. +#[derive(Clone, Copy, Debug, Deserialize, PartialEq, Serialize)] +pub enum CacheType { + /// Flushing mechanic will be advertised to the guest driver, but + /// the operation will be a noop. + Unsafe, + /// Flushing mechanic will be advertised to the guest driver and + /// flush requests coming from the guest will be performed using + /// `fsync`. + Writeback, +} + +impl Default for CacheType { + fn default() -> CacheType { + CacheType::Unsafe + } +} + /// Helper object for setting up all `Block` fields derived from its backing file. pub(crate) struct DiskProperties { + cache_type: CacheType, file_path: String, file: File, nsectors: u64, @@ -39,7 +60,11 @@ pub(crate) struct DiskProperties { } impl DiskProperties { - pub fn new(disk_image_path: String, is_disk_read_only: bool) -> io::Result { + pub fn new( + disk_image_path: String, + is_disk_read_only: bool, + cache_type: CacheType, + ) -> io::Result { let mut disk_image = OpenOptions::new() .read(true) .write(!is_disk_read_only) @@ -57,6 +82,7 @@ impl DiskProperties { } Ok(Self { + cache_type, nsectors: disk_size >> SECTOR_SHIFT, image_id: Self::build_disk_image_id(&disk_image), file_path: disk_image_path, @@ -121,6 +147,31 @@ impl DiskProperties { } config } + + pub fn cache_type(&self) -> CacheType { + self.cache_type + } +} + +impl Drop for DiskProperties { + fn drop(&mut self) { + match self.cache_type { + CacheType::Writeback => { + // flush() first to force any cached data out. + if self.file.flush().is_err() { + error!("Failed to flush block data on drop."); + } + // Sync data out to physical media on host. + if self.file.sync_all().is_err() { + error!("Failed to sync block data on drop.") + } + METRICS.block.flush_count.inc(); + } + CacheType::Unsafe => { + // This is a noop. + } + }; + } } /// Virtio device for exposing block level read/write operations on a host file. @@ -155,12 +206,13 @@ impl Block { pub fn new( id: String, partuuid: Option, + cache_type: CacheType, disk_image_path: String, is_disk_read_only: bool, is_disk_root: bool, rate_limiter: RateLimiter, ) -> io::Result { - let disk_properties = DiskProperties::new(disk_image_path, is_disk_read_only)?; + let disk_properties = DiskProperties::new(disk_image_path, is_disk_read_only, cache_type)?; let mut avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_BLK_F_FLUSH); @@ -343,7 +395,8 @@ impl Block { /// Update the backing file and the config space of the block device. pub fn update_disk_image(&mut self, disk_image_path: String) -> io::Result<()> { - let disk_properties = DiskProperties::new(disk_image_path, self.is_read_only())?; + let disk_properties = + DiskProperties::new(disk_image_path, self.is_read_only(), self.cache_type())?; self.disk = disk_properties; self.config_space = self.disk.virtio_block_config_space(); @@ -380,6 +433,10 @@ impl Block { pub fn is_root_device(&self) -> bool { self.root_device } + + pub fn cache_type(&self) -> CacheType { + self.disk.cache_type() + } } impl VirtioDevice for Block { @@ -491,8 +548,12 @@ pub(crate) mod tests { let size = SECTOR_SIZE * num_sectors; f.as_file().set_len(size).unwrap(); - let disk_properties = - DiskProperties::new(String::from(f.as_path().to_str().unwrap()), true).unwrap(); + let disk_properties = DiskProperties::new( + String::from(f.as_path().to_str().unwrap()), + true, + CacheType::Unsafe, + ) + .unwrap(); assert_eq!(size, SECTOR_SIZE * num_sectors); assert_eq!(disk_properties.nsectors, num_sectors); @@ -504,7 +565,9 @@ pub(crate) mod tests { // Testing `backing_file.virtio_block_disk_image_id()` implies // duplicating that logic in tests, so skipping it. - assert!(DiskProperties::new("invalid-disk-path".to_string(), true).is_err()); + assert!( + DiskProperties::new("invalid-disk-path".to_string(), true, CacheType::Unsafe).is_err() + ); } #[test] diff --git a/src/devices/src/virtio/block/mod.rs b/src/devices/src/virtio/block/mod.rs index 0113c087c83..affe8995678 100644 --- a/src/devices/src/virtio/block/mod.rs +++ b/src/devices/src/virtio/block/mod.rs @@ -7,7 +7,7 @@ pub mod persist; pub mod request; pub mod test_utils; -pub use self::device::Block; +pub use self::device::{Block, CacheType}; pub use self::event_handler::*; pub use self::request::*; diff --git a/src/devices/src/virtio/block/persist.rs b/src/devices/src/virtio/block/persist.rs index fcec620a052..26852977e27 100644 --- a/src/devices/src/virtio/block/persist.rs +++ b/src/devices/src/virtio/block/persist.rs @@ -7,9 +7,10 @@ use std::io; use std::sync::atomic::AtomicUsize; use std::sync::Arc; +use logger::warn; use rate_limiter::{persist::RateLimiterState, RateLimiter}; use snapshot::Persist; -use versionize::{VersionMap, Versionize, VersionizeResult}; +use versionize::{VersionMap, Versionize, VersionizeError, VersionizeResult}; use versionize_derive::Versionize; use virtio_gen::virtio_blk::VIRTIO_BLK_F_RO; use vm_memory::GuestMemoryMmap; @@ -19,17 +20,65 @@ use super::*; use crate::virtio::persist::VirtioDeviceState; use crate::virtio::{DeviceState, TYPE_BLOCK}; +#[derive(Clone, Copy, Debug, Versionize, PartialEq)] +// NOTICE: Any changes to this structure require a snapshot version bump. +pub enum CacheTypeState { + Unsafe, + Writeback, +} + +impl From for CacheTypeState { + fn from(cache_type: CacheType) -> Self { + match cache_type { + CacheType::Unsafe => CacheTypeState::Unsafe, + CacheType::Writeback => CacheTypeState::Writeback, + } + } +} + +impl Into for CacheTypeState { + fn into(self) -> CacheType { + match self { + CacheTypeState::Unsafe => CacheType::Unsafe, + CacheTypeState::Writeback => CacheType::Writeback, + } + } +} + #[derive(Clone, Versionize)] // NOTICE: Any changes to this structure require a snapshot version bump. pub struct BlockState { id: String, partuuid: Option, + #[version( + start = 2, + ser_fn = "block_cache_type_ser", + default_fn = "default_cache_type_flush" + )] + cache_type: CacheTypeState, root_device: bool, disk_path: String, virtio_state: VirtioDeviceState, rate_limiter_state: RateLimiterState, } +impl BlockState { + fn block_cache_type_ser(&mut self, target_version: u16) -> VersionizeResult<()> { + if target_version < 3 && self.cache_type != CacheTypeState::Unsafe { + warn!( + "Target version does not implement the current cache type. \ + Defaulting to \"unsafe\" mode." + ); + } + + Ok(()) + } + + fn default_cache_type_flush(_source_version: u16) -> CacheTypeState { + CacheTypeState::Unsafe + } +} + pub struct BlockConstructorArgs { pub mem: GuestMemoryMmap, } @@ -43,6 +92,7 @@ impl Persist<'_> for Block { BlockState { id: self.id.clone(), partuuid: self.partuuid.clone(), + cache_type: CacheTypeState::from(self.cache_type()), root_device: self.root_device, disk_path: self.disk.file_path().clone(), virtio_state: VirtioDeviceState::from_device(self), @@ -60,6 +110,7 @@ impl Persist<'_> for Block { let mut block = Block::new( state.id.clone(), state.partuuid.clone(), + state.cache_type.into(), state.disk_path.clone(), is_disk_read_only, state.root_device, @@ -91,6 +142,67 @@ mod tests { use crate::virtio::test_utils::default_mem; use std::sync::atomic::Ordering; + #[test] + fn test_cache_type_state_from() { + assert_eq!( + CacheTypeState::Unsafe, + CacheTypeState::from(CacheType::Unsafe) + ); + assert_eq!( + CacheTypeState::Writeback, + CacheTypeState::from(CacheType::Writeback) + ); + } + + #[test] + fn test_cache_type_state_into() { + assert_eq!(CacheType::Unsafe, CacheTypeState::Unsafe.into()); + assert_eq!(CacheType::Writeback, CacheTypeState::Writeback.into()); + } + + #[test] + fn test_default_cache_type_flush() { + assert_eq!( + BlockState::default_cache_type_flush(2), + CacheTypeState::Unsafe + ); + assert_eq!( + BlockState::default_cache_type_flush(3), + CacheTypeState::Unsafe + ); + } + + #[test] + fn test_cache_semantic_ser() { + // We create the backing file here so that it exists for the whole lifetime of the test. + let f = TempFile::new().unwrap(); + f.as_file().set_len(0x1000).unwrap(); + + let id = "test".to_string(); + let block = Block::new( + id, + None, + CacheType::Writeback, + f.as_path().to_str().unwrap().to_string(), + false, + false, + RateLimiter::default(), + ) + .unwrap(); + + // Save the block device. + let mut mem = vec![0; 4096]; + let version_map = VersionMap::new(); + + assert!(::save(&block) + .serialize(&mut mem.as_mut_slice(), &version_map, 2) + .is_ok()); + + assert!(::save(&block) + .serialize(&mut mem.as_mut_slice(), &version_map, 3) + .is_ok()); + } + #[test] fn test_persistence() { // We create the backing file here so that it exists for the whole lifetime of the test. @@ -101,6 +213,7 @@ mod tests { let block = Block::new( id, None, + CacheType::Unsafe, f.as_path().to_str().unwrap().to_string(), false, false, diff --git a/src/devices/src/virtio/block/request.rs b/src/devices/src/virtio/block/request.rs index 85c18f5c49e..4613b1e2d77 100644 --- a/src/devices/src/virtio/block/request.rs +++ b/src/devices/src/virtio/block/request.rs @@ -14,7 +14,7 @@ use virtio_gen::virtio_blk::*; use vm_memory::{ByteValued, Bytes, GuestAddress, GuestMemoryError, GuestMemoryMmap}; use super::super::DescriptorChain; -use super::device::DiskProperties; +use super::device::{CacheType, DiskProperties}; use super::{Error, SECTOR_SHIFT, SECTOR_SIZE}; #[derive(Debug)] @@ -23,6 +23,7 @@ pub enum ExecuteError { Flush(io::Error), Read(GuestMemoryError), Seek(io::Error), + SyncAll(io::Error), Write(GuestMemoryError), Unsupported(u32), } @@ -34,6 +35,7 @@ impl ExecuteError { ExecuteError::Flush(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Read(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Seek(_) => VIRTIO_BLK_S_IOERR, + ExecuteError::SyncAll(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Write(_) => VIRTIO_BLK_S_IOERR, ExecuteError::Unsupported(_) => VIRTIO_BLK_S_UNSUPP, } @@ -192,6 +194,7 @@ impl Request { return Err(ExecuteError::BadRequest(Error::InvalidOffset)); } + let cache_type = disk.cache_type(); let diskfile = disk.file_mut(); diskfile .seek(SeekFrom::Start(self.sector << SECTOR_SHIFT)) @@ -214,13 +217,21 @@ impl Request { 0 }) .map_err(ExecuteError::Write), - RequestType::Flush => diskfile - .flush() - .map(|_| { - METRICS.block.flush_count.inc(); - 0 - }) - .map_err(ExecuteError::Flush), + RequestType::Flush => { + match cache_type { + CacheType::Writeback => { + // flush() first to force any cached data out. + diskfile.flush().map_err(ExecuteError::Flush)?; + // Sync data out to physical media on host. + diskfile.sync_all().map_err(ExecuteError::SyncAll)?; + METRICS.block.flush_count.inc(); + } + CacheType::Unsafe => { + // This is a noop. + } + }; + Ok(0) + } RequestType::GetDeviceID => { let disk_id = disk.image_id(); if (self.data_len as usize) < disk_id.len() { diff --git a/src/devices/src/virtio/block/test_utils.rs b/src/devices/src/virtio/block/test_utils.rs index e79bf30b388..5b1f86fbfea 100644 --- a/src/devices/src/virtio/block/test_utils.rs +++ b/src/devices/src/virtio/block/test_utils.rs @@ -3,7 +3,7 @@ use std::os::unix::io::AsRawFd; -use crate::virtio::{Block, Queue}; +use crate::virtio::{Block, CacheType, Queue}; use polly::event_manager::{EventManager, Subscriber}; use rate_limiter::RateLimiter; use utils::epoll::{EpollEvent, EventSet}; @@ -25,7 +25,16 @@ pub fn default_block_with_path(path: String) -> Block { let id = "test".to_string(); // The default block device is read-write and non-root. - Block::new(id, None, path, false, false, rate_limiter).unwrap() + Block::new( + id, + None, + CacheType::Unsafe, + path, + false, + false, + rate_limiter, + ) + .unwrap() } pub fn invoke_handler_for_queue_event(b: &mut Block) { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index e60bdd3b2e9..71f7c3d90ad 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -835,7 +835,7 @@ pub mod tests { use super::*; use crate::vmm_config::balloon::{BalloonBuilder, BalloonDeviceConfig, BALLOON_DEV_ID}; use crate::vmm_config::boot_source::DEFAULT_KERNEL_CMDLINE; - use crate::vmm_config::drive::{BlockBuilder, BlockDeviceConfig}; + use crate::vmm_config::drive::{BlockBuilder, BlockDeviceConfig, CacheType}; use crate::vmm_config::net::{NetBuilder, NetworkInterfaceConfig}; use crate::vmm_config::vsock::tests::default_config; use crate::vmm_config::vsock::{VsockBuilder, VsockDeviceConfig}; @@ -850,6 +850,7 @@ pub mod tests { is_root_device: bool, partuuid: Option, is_read_only: bool, + cache_type: CacheType, } impl CustomBlockConfig { @@ -858,12 +859,14 @@ pub mod tests { is_root_device: bool, partuuid: Option, is_read_only: bool, + cache_type: CacheType, ) -> Self { CustomBlockConfig { drive_id, is_root_device, partuuid, is_read_only, + cache_type, } } } @@ -946,6 +949,7 @@ pub mod tests { is_root_device: custom_block_cfg.is_root_device, partuuid: custom_block_cfg.partuuid.clone(), is_read_only: custom_block_cfg.is_read_only, + cache_type: custom_block_cfg.cache_type, rate_limiter: None, }; block_dev_configs.insert(block_device_config).unwrap(); @@ -1137,7 +1141,13 @@ pub mod tests { // Use case 1: root block device is not specified through PARTUUID. { let drive_id = String::from("root"); - let block_configs = vec![CustomBlockConfig::new(drive_id.clone(), true, None, true)]; + let block_configs = vec![CustomBlockConfig::new( + drive_id.clone(), + true, + None, + true, + CacheType::Unsafe, + )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); @@ -1156,6 +1166,7 @@ pub mod tests { true, Some("0eaa91a0-01".to_string()), false, + CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); @@ -1175,6 +1186,7 @@ pub mod tests { false, Some("0eaa91a0-01".to_string()), false, + CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); @@ -1195,9 +1207,22 @@ pub mod tests { true, Some("0eaa91a0-01".to_string()), false, + CacheType::Unsafe, + ), + CustomBlockConfig::new( + String::from("secondary"), + false, + None, + true, + CacheType::Unsafe, + ), + CustomBlockConfig::new( + String::from("third"), + false, + None, + false, + CacheType::Unsafe, ), - CustomBlockConfig::new(String::from("secondary"), false, None, true), - CustomBlockConfig::new(String::from("third"), false, None, false), ]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); @@ -1227,7 +1252,13 @@ pub mod tests { // Use case 5: root block device is rw. { let drive_id = String::from("root"); - let block_configs = vec![CustomBlockConfig::new(drive_id.clone(), true, None, false)]; + let block_configs = vec![CustomBlockConfig::new( + drive_id.clone(), + true, + None, + false, + CacheType::Unsafe, + )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); @@ -1246,6 +1277,7 @@ pub mod tests { true, Some("0eaa91a0-01".to_string()), true, + CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); @@ -1256,6 +1288,26 @@ pub mod tests { .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) .is_some()); } + + // Use case 7: root block device is rw with flush enabled + { + let drive_id = String::from("root"); + let block_configs = vec![CustomBlockConfig::new( + drive_id.clone(), + true, + None, + false, + CacheType::Writeback, + )]; + let mut vmm = default_vmm(); + let mut cmdline = default_kernel_cmdline(); + insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); + assert!(cmdline.as_str().contains("root=/dev/vda rw")); + assert!(vmm + .mmio_device_manager + .get_device(DeviceType::Virtio(TYPE_BLOCK), drive_id.as_str()) + .is_some()); + } } #[test] diff --git a/src/vmm/src/default_syscalls/filters.rs b/src/vmm/src/default_syscalls/filters.rs index a335ee473f0..0dbba3f8e7d 100644 --- a/src/vmm/src/default_syscalls/filters.rs +++ b/src/vmm/src/default_syscalls/filters.rs @@ -158,6 +158,7 @@ pub fn default_filter() -> Result { libc::SYS_timerfd_settime, or![and![Cond::new(1, ArgLen::DWORD, Eq, 0u64)?],], ), + allow_syscall(libc::SYS_fsync), allow_syscall(libc::SYS_write), ] .into_iter() diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 3cd47d3a2e2..00206ba6d59 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -401,6 +401,7 @@ mod tests { use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::vsock::VsockDeviceConfig; + use devices::virtio::block::CacheType; use polly::event_manager::EventManager; use utils::tempfile::TempFile; @@ -547,7 +548,13 @@ mod tests { insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_cfg); // Add a block device. let drive_id = String::from("root"); - let block_configs = vec![CustomBlockConfig::new(drive_id, true, None, true)]; + let block_configs = vec![CustomBlockConfig::new( + drive_id, + true, + None, + true, + CacheType::Unsafe, + )]; _block_files = insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); // Add a net device. diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index e6a25c09d80..ae5f30768f3 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -415,6 +415,7 @@ mod tests { use crate::memory_snapshot::SnapshotMemory; use crate::version_map::{FC_VERSION_TO_SNAP_VERSION, VERSION_MAP}; use crate::vmm_config::balloon::BalloonDeviceConfig; + use crate::vmm_config::drive::CacheType; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::vsock::tests::default_config; use crate::Vmm; @@ -441,7 +442,13 @@ mod tests { // Add a block device. let drive_id = String::from("root"); - let block_configs = vec![CustomBlockConfig::new(drive_id, true, None, true)]; + let block_configs = vec![CustomBlockConfig::new( + drive_id, + true, + None, + true, + CacheType::Unsafe, + )]; insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); // Add net device. diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index 4bb3819938f..8cb72248c2a 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -395,6 +395,7 @@ mod tests { path_on_host: tmp_file.as_path().to_str().unwrap().to_string(), is_root_device: false, partuuid: Some("0eaa91a0-01".to_string()), + cache_type: CacheType::Unsafe, is_read_only: false, rate_limiter: Some(RateLimiterConfig::default()), }, diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 33327f07601..18e03ff0b55 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -653,6 +653,7 @@ impl RuntimeApiController { mod tests { use super::*; use crate::vmm_config::balloon::BalloonBuilder; + use crate::vmm_config::drive::CacheType; use crate::vmm_config::logger::LoggerLevel; use crate::vmm_config::vsock::VsockBuilder; use devices::virtio::balloon::{BalloonConfig, Error as BalloonError}; @@ -1052,6 +1053,7 @@ mod tests { path_on_host: String::new(), is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::new(), rate_limiter: None, @@ -1065,6 +1067,7 @@ mod tests { path_on_host: String::new(), is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::new(), rate_limiter: None, @@ -1498,6 +1501,7 @@ mod tests { path_on_host: String::new(), is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::new(), rate_limiter: None, @@ -1587,6 +1591,7 @@ mod tests { path_on_host: String::new(), is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::new(), rate_limiter: None, diff --git a/src/vmm/src/version_map.rs b/src/vmm/src/version_map.rs index 9f83ed7513b..1f9fdff296e 100644 --- a/src/vmm/src/version_map.rs +++ b/src/vmm/src/version_map.rs @@ -6,6 +6,7 @@ use std::collections::HashMap; use crate::device_manager::persist::DeviceStates; +use devices::virtio::block::persist::BlockState; use lazy_static::lazy_static; use versionize::VersionMap; @@ -17,6 +18,7 @@ lazy_static! { pub static ref VERSION_MAP: VersionMap = { let mut version_map = VersionMap::new(); version_map.new_version().set_type_version(DeviceStates::type_id(), 2); + version_map.new_version().set_type_version(BlockState::type_id(), 2); version_map }; @@ -26,6 +28,7 @@ lazy_static! { let mut mapping = HashMap::new(); mapping.insert(String::from("0.23.0"), 1); mapping.insert(String::from("0.24.0"), 2); + mapping.insert(String::from("0.25.0"), 3); mapping }; diff --git a/src/vmm/src/vmm_config/drive.rs b/src/vmm/src/vmm_config/drive.rs index 15effb44b8d..66fd0f084fa 100644 --- a/src/vmm/src/vmm_config/drive.rs +++ b/src/vmm/src/vmm_config/drive.rs @@ -13,6 +13,8 @@ use super::RateLimiterConfig; use crate::Error as VmmError; use devices::virtio::Block; +pub use devices::virtio::CacheType; + use serde::Deserialize; type Result = result::Result; @@ -79,6 +81,10 @@ pub struct BlockDeviceConfig { /// If set to true, the drive is opened in read-only mode. Otherwise, the /// drive is opened as read-write. pub is_read_only: bool, + /// If set to true, the drive will ignore flush requests coming from + /// the guest driver. + #[serde(default = "CacheType::default")] + pub cache_type: CacheType, /// Rate Limiter for I/O operations. pub rate_limiter: Option, } @@ -189,6 +195,7 @@ impl BlockBuilder { devices::virtio::Block::new( block_device_config.drive_id, block_device_config.partuuid, + block_device_config.cache_type, block_device_config.path_on_host, block_device_config.is_read_only, block_device_config.is_root_device, @@ -218,6 +225,7 @@ mod tests { path_on_host: self.path_on_host.clone(), is_root_device: self.is_root_device, partuuid: self.partuuid.clone(), + cache_type: self.cache_type, is_read_only: self.is_read_only, drive_id: self.drive_id.clone(), rate_limiter: None, @@ -240,6 +248,7 @@ mod tests { path_on_host: dummy_path, is_root_device: false, partuuid: None, + cache_type: CacheType::Writeback, is_read_only: false, drive_id: dummy_id.clone(), rate_limiter: None, @@ -269,6 +278,7 @@ mod tests { path_on_host: dummy_path, is_root_device: true, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: true, drive_id: String::from("1"), rate_limiter: None, @@ -295,6 +305,7 @@ mod tests { path_on_host: dummy_path_1, is_root_device: true, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("1"), rate_limiter: None, @@ -306,6 +317,7 @@ mod tests { path_on_host: dummy_path_2, is_root_device: true, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("2"), rate_limiter: None, @@ -328,6 +340,7 @@ mod tests { path_on_host: dummy_path_1, is_root_device: true, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("1"), rate_limiter: None, @@ -339,6 +352,7 @@ mod tests { path_on_host: dummy_path_2, is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("2"), rate_limiter: None, @@ -350,6 +364,7 @@ mod tests { path_on_host: dummy_path_3, is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("3"), rate_limiter: None, @@ -386,6 +401,7 @@ mod tests { path_on_host: dummy_path_1, is_root_device: true, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("1"), rate_limiter: None, @@ -397,6 +413,7 @@ mod tests { path_on_host: dummy_path_2, is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("2"), rate_limiter: None, @@ -408,6 +425,7 @@ mod tests { path_on_host: dummy_path_3, is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("3"), rate_limiter: None, @@ -445,6 +463,7 @@ mod tests { path_on_host: dummy_path_1.clone(), is_root_device: true, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("1"), rate_limiter: None, @@ -456,6 +475,7 @@ mod tests { path_on_host: dummy_path_2.clone(), is_root_device: false, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("2"), rate_limiter: None, @@ -514,6 +534,7 @@ mod tests { path_on_host: dummy_path_1, is_root_device: true, partuuid: None, + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("1"), rate_limiter: None, @@ -525,6 +546,7 @@ mod tests { path_on_host: dummy_path_2, is_root_device: true, partuuid: Some("0eaa91a0-01".to_string()), + cache_type: CacheType::Unsafe, is_read_only: false, drive_id: String::from("2"), rate_limiter: None, @@ -548,6 +570,7 @@ mod tests { path_on_host: dummy_block_file.as_path().to_str().unwrap().to_string(), is_root_device: false, partuuid: Some("0eaa91a0-01".to_string()), + cache_type: CacheType::Unsafe, is_read_only: true, rate_limiter: None, }; diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 0ce4b15ea7d..7b86286e0d7 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -611,6 +611,7 @@ def add_drive( root_device=False, is_read_only=False, partuuid=None, + cache_type=None, ): """Add a block device.""" response = self.drive.put( @@ -618,7 +619,8 @@ def add_drive( path_on_host=self.create_jailed_resource(file_path), is_root_device=root_device, is_read_only=is_read_only, - partuuid=partuuid + partuuid=partuuid, + cache_type=cache_type ) assert self.api_session.is_status_no_content(response.status_code) diff --git a/tests/framework/resources.py b/tests/framework/resources.py index cba6cc150f4..72a7cf44069 100644 --- a/tests/framework/resources.py +++ b/tests/framework/resources.py @@ -231,7 +231,8 @@ def create_json( is_root_device=None, partuuid=None, is_read_only=None, - rate_limiter=None): + rate_limiter=None, + cache_type=None): """Compose the json associated to this type of API request.""" datax = {} @@ -250,6 +251,9 @@ def create_json( if is_read_only is not None: datax['is_read_only'] = is_read_only + if cache_type is not None: + datax['cache_type'] = cache_type + if rate_limiter is not None: datax['rate_limiter'] = rate_limiter diff --git a/tests/integration_tests/build/test_coverage.py b/tests/integration_tests/build/test_coverage.py index b71b5a63bb0..b263088e177 100644 --- a/tests/integration_tests/build/test_coverage.py +++ b/tests/integration_tests/build/test_coverage.py @@ -23,7 +23,7 @@ # this contains the frequency while on AMD it does not. # Checkout the cpuid crate. In the future other # differences may appear. -COVERAGE_DICT = {"Intel": 85.44, "AMD": 84.77, "ARM": 83.42} +COVERAGE_DICT = {"Intel": 85.44, "AMD": 84.74, "ARM": 83.60} PROC_MODEL = proc.proc_type() diff --git a/tests/integration_tests/functional/test_drives.py b/tests/integration_tests/functional/test_drives.py index a3d0556c70a..9b5d627ce72 100644 --- a/tests/integration_tests/functional/test_drives.py +++ b/tests/integration_tests/functional/test_drives.py @@ -10,6 +10,7 @@ import host_tools.drive as drive_tools import host_tools.network as net_tools # pylint: disable=import-error +import host_tools.logging as log_tools PARTUUID = {"x86_64": "0eaa91a0-01", "aarch64": "7bf14469-01"} MB = 1024 * 1024 @@ -337,6 +338,134 @@ def test_patch_drive(test_microvm_with_ssh, network_config): assert stdout.readline().strip() == size_bytes_str +def test_no_flush(test_microvm_with_ssh, network_config): + """Verify default block ignores flush.""" + test_microvm = test_microvm_with_ssh + test_microvm.spawn() + + test_microvm.basic_config( + vcpu_count=1, + add_root_device=False + ) + + _tap, _, _ = test_microvm.ssh_network_config(network_config, '1') + + # Add the block device + test_microvm.add_drive( + 'rootfs', + test_microvm.rootfs_file, + root_device=True, + ) + + # Configure the metrics. + metrics_fifo_path = os.path.join(test_microvm.path, 'metrics_fifo') + metrics_fifo = log_tools.Fifo(metrics_fifo_path) + response = test_microvm.metrics.put( + metrics_path=test_microvm.create_jailed_resource(metrics_fifo.path) + ) + assert test_microvm.api_session.is_status_no_content(response.status_code) + + test_microvm.start() + + # Verify all flush commands were ignored during boot. + fc_metrics = test_microvm.flush_metrics(metrics_fifo) + assert fc_metrics['block']['flush_count'] == 0 + + # Have the guest drop the caches to generate flush requests. + ssh_connection = net_tools.SSHConnection(test_microvm.ssh_config) + cmd = "sync; echo 1 > /proc/sys/vm/drop_caches" + _, _, stderr = ssh_connection.execute_command(cmd) + assert stderr.read() == '' + + # Verify all flush commands were ignored even after + # dropping the caches. + fc_metrics = test_microvm.flush_metrics(metrics_fifo) + assert fc_metrics['block']['flush_count'] == 0 + + +def test_flush(test_microvm_with_ssh, network_config): + """Verify block with flush actually flushes.""" + test_microvm = test_microvm_with_ssh + test_microvm.spawn() + + test_microvm.basic_config( + vcpu_count=1, + add_root_device=False + ) + + _tap, _, _ = test_microvm.ssh_network_config(network_config, '1') + + # Add the block device with explicitly enabling flush. + test_microvm.add_drive( + 'rootfs', + test_microvm.rootfs_file, + root_device=True, + cache_type="Writeback", + ) + + # Configure metrics, to get later the `flush_count`. + metrics_fifo_path = os.path.join(test_microvm.path, 'metrics_fifo') + metrics_fifo = log_tools.Fifo(metrics_fifo_path) + response = test_microvm.metrics.put( + metrics_path=test_microvm.create_jailed_resource(metrics_fifo.path) + ) + assert test_microvm.api_session.is_status_no_content(response.status_code) + + test_microvm.start() + + # Have the guest drop the caches to generate flush requests. + ssh_connection = net_tools.SSHConnection(test_microvm.ssh_config) + cmd = "sync; echo 1 > /proc/sys/vm/drop_caches" + _, _, stderr = ssh_connection.execute_command(cmd) + assert stderr.read() == '' + + # On average, dropping the caches right after boot generates + # about 6 block flush requests. + fc_metrics = test_microvm.flush_metrics(metrics_fifo) + assert fc_metrics['block']['flush_count'] > 0 + + +def test_block_default_cache_old_version(test_microvm_with_ssh): + """Verify that saving a snapshot for old versions works correctly.""" + test_microvm = test_microvm_with_ssh + test_microvm.spawn() + + test_microvm.basic_config( + vcpu_count=1, + add_root_device=False + ) + + # Add the block device with explicitly enabling flush. + test_microvm.add_drive( + 'rootfs', + test_microvm.rootfs_file, + root_device=True, + cache_type="Writeback", + ) + + test_microvm.start() + + # Pause the VM to create the snapshot. + response = test_microvm.vm.patch(state='Paused') + assert test_microvm.api_session.is_status_no_content(response.status_code) + + # Create the snapshot for a version without block cache type. + response = test_microvm.snapshot.create( + mem_file_path='memfile', + snapshot_path='snapsfile', + diff=False, + version='0.24.0' + ) + assert test_microvm.api_session.is_status_no_content(response.status_code) + + # We should find a warning in the logs for this case as this + # cache type was not supported in 0.24.0 and we should default + # to "Unsafe" mode. + log_data = test_microvm.log_data + assert "Target version does not implement the current cache type. "\ + "Defaulting to \"unsafe\" mode." in log_data + + def check_iops_limit(ssh_connection, block_size, count, min_time, max_time): """Verify if the rate limiter throttles block iops using dd.""" byte_count = block_size * count