diff --git a/vhost-user-backend/CHANGELOG.md b/vhost-user-backend/CHANGELOG.md index 5c24f43d..bd835f78 100644 --- a/vhost-user-backend/CHANGELOG.md +++ b/vhost-user-backend/CHANGELOG.md @@ -2,6 +2,7 @@ ## [Unreleased] ### Added +[#206](https://github.com/rust-vmm/vhost/pull/206) Add bitmap support for tracking dirty pages during migration ### Changed diff --git a/vhost-user-backend/src/bitmap.rs b/vhost-user-backend/src/bitmap.rs new file mode 100644 index 00000000..a9864b13 --- /dev/null +++ b/vhost-user-backend/src/bitmap.rs @@ -0,0 +1,632 @@ +// Copyright (C) 2024 Red Hat, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +use std::ops::Index; +use std::os::fd::{AsRawFd, BorrowedFd}; +use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::{Arc, RwLock}; +use std::{io, ptr}; +use vm_memory::bitmap::{Bitmap, BitmapSlice, WithBitmapSlice}; +use vm_memory::mmap::NewBitmap; +use vm_memory::{Address, GuestMemoryRegion}; + +// Size in bytes of the `VHOST_LOG_PAGE` +const LOG_PAGE_SIZE: usize = 0x1000; +// Number of bits grouped together as a basic storage unit ("word") in the bitmap +// (i.e., in this case one byte tracks 8 pages, one bit per page). +const LOG_WORD_SIZE: usize = u8::BITS as usize; + +/// A `Bitmap` with an internal `Bitmap` that can be replaced at runtime +pub trait BitmapReplace: Bitmap { + type InnerBitmap: MemRegionBitmap; + + /// Replace the internal `Bitmap` + fn replace(&self, bitmap: Self::InnerBitmap); +} + +/// A bitmap relative to a memory region +pub trait MemRegionBitmap: Sized { + /// Creates a new bitmap relative to `region`, using the `logmem` as + /// backing memory for the bitmap + fn new(region: &R, logmem: Arc) -> io::Result; +} + +// TODO: This impl is a quick and dirty hack to allow the tests to continue using +// `GuestMemoryMmap<()>`. Sadly this is exposed in the public API, but it should +// be moved to an internal mock library. +impl BitmapReplace for () { + type InnerBitmap = (); + + // this implementation must not be used if the backend sets `VHOST_USER_PROTOCOL_F_LOG_SHMFD` + fn replace(&self, _bitmap: ()) { + panic!("The unit bitmap () must not be used if VHOST_USER_PROTOCOL_F_LOG_SHMFD is set"); + } +} + +impl MemRegionBitmap for () { + fn new(_region: &R, _logmem: Arc) -> io::Result { + Err(io::Error::from(io::ErrorKind::Unsupported)) + } +} + +/// `BitmapMmapRegion` implements a bitmap tha can be replaced at runtime. +/// The main use case is to support live migration on vhost-user backends +/// (see `VHOST_USER_PROTOCOL_F_LOG_SHMFD` and `VHOST_USER_SET_LOG_BASE` in the vhost-user protocol +/// specification). It uses a fixed memory page size of `VHOST_LOG_PAGE` bytes (i.e., `4096` bytes), +/// so it converts addresses to page numbers before setting or clearing the bits. +/// +/// To use this bitmap you need to define the memory as `GuestMemoryMmap`. +/// +/// Note: +/// This implementation uses `std::sync::RwLock`, the priority policy of the lock is dependent on +/// the underlying operating system's implementation and does not guarantee any particular policy, +/// in systems other than linux a thread trying to acquire the lock may starve. +#[derive(Default, Debug, Clone)] +pub struct BitmapMmapRegion { + // TODO: To avoid both reader and writer starvation we can replace the `std::sync::RwLock` with + // `parking_lot::RwLock`. + inner: Arc>>, + base_address: usize, // The slice's base address +} + +impl Bitmap for BitmapMmapRegion { + fn mark_dirty(&self, offset: usize, len: usize) { + let inner = self.inner.read().unwrap(); + if let Some(bitmap) = inner.as_ref() { + if let Some(absolute_offset) = self.base_address.checked_add(offset) { + bitmap.mark_dirty(absolute_offset, len); + } + } + } + + fn dirty_at(&self, offset: usize) -> bool { + let inner = self.inner.read().unwrap(); + inner + .as_ref() + .is_some_and(|bitmap| bitmap.dirty_at(self.base_address.saturating_add(offset))) + } + + fn slice_at(&self, offset: usize) -> ::S { + Self { + inner: Arc::clone(&self.inner), + base_address: self.base_address.saturating_add(offset), + } + } +} + +impl BitmapReplace for BitmapMmapRegion { + type InnerBitmap = AtomicBitmapMmap; + + fn replace(&self, bitmap: AtomicBitmapMmap) { + let mut inner = self.inner.write().unwrap(); + inner.replace(bitmap); + } +} + +impl BitmapSlice for BitmapMmapRegion {} + +impl<'a> WithBitmapSlice<'a> for BitmapMmapRegion { + type S = Self; +} + +impl NewBitmap for BitmapMmapRegion { + fn with_len(_len: usize) -> Self { + Self::default() + } +} + +/// `AtomicBitmapMmap` implements a simple memory-mapped bitmap on the page level with test +/// and set operations. The main use case is to support live migration on vhost-user backends +/// (see `VHOST_USER_PROTOCOL_F_LOG_SHMFD` and `VHOST_USER_SET_LOG_BASE` in the vhost-user protocol +/// specification). It uses a fixed memory page size of `LOG_PAGE_SIZE` bytes, so it converts +/// addresses to page numbers before setting or clearing the bits. +#[derive(Debug)] +pub struct AtomicBitmapMmap { + logmem: Arc, + pages_before_region: usize, // Number of pages to ignore from the start of the bitmap + number_of_pages: usize, // Number of total pages indexed in the bitmap for this region +} + +// `AtomicBitmapMmap` implements a simple bitmap, it is page-size aware and relative +// to a memory region. It handling the `log` memory mapped area. Each page is indexed +// inside a word of `LOG_WORD_SIZE` bits, so even if the bitmap starts at the beginning of +// the mapped area, the memory region does not necessarily have to start at the beginning of +// that word. +// Note: we don't implement `Bitmap` because we cannot implement `slice_at()` +impl MemRegionBitmap for AtomicBitmapMmap { + // Creates a new memory-mapped bitmap for the memory region. This bitmap must fit within the + // log mapped memory. + fn new(region: &R, logmem: Arc) -> io::Result { + let region_start_addr: usize = region.start_addr().raw_value().io_try_into()?; + let region_len: usize = region.len().io_try_into()?; + if region_len == 0 { + return Err(io::Error::from(io::ErrorKind::InvalidData)); + } + + // The size of the log should be large enough to cover all known guest addresses. + let region_end_addr = region_start_addr + .checked_add(region_len - 1) + .ok_or(io::Error::from(io::ErrorKind::InvalidData))?; + let region_end_log_word = page_word(page_number(region_end_addr)); + if region_end_log_word >= logmem.len() { + return Err(io::Error::from(io::ErrorKind::InvalidData)); + } + + // The frontend sends a single bitmap (i.e., the log memory to be mapped using `fd`, + // `mmap_offset` and `mmap_size`) that covers the entire guest memory. + // However, since each memory region requires a bitmap relative to them, we have to + // adjust the offset and size, in number of pages, of this region. + let offset_pages = page_number(region_start_addr); + let size_page = page_number(region_len); + + Ok(Self { + logmem, + pages_before_region: offset_pages, + number_of_pages: size_page, + }) + } +} + +impl AtomicBitmapMmap { + // Sets the memory range as dirty. The `offset` is relative to the memory region, + // so an offset of `0` references the start of the memory region. Any attempt to + // access beyond the end of the bitmap are simply ignored. + fn mark_dirty(&self, offset: usize, len: usize) { + if len == 0 { + return; + } + + let first_page = page_number(offset); + let last_page = page_number(offset.saturating_add(len - 1)); + for page in first_page..=last_page { + if page >= self.number_of_pages { + break; // ignore out of bound access + } + + // get the absolute page number + let page = self.pages_before_region + page; + self.logmem[page_word(page)].fetch_or(1 << page_bit(page), Ordering::Relaxed); + } + } + + // Check whether the specified offset is marked as dirty. The `offset` is relative + // to the memory region, so a `0` offset references the start of the memory region. + // Any attempt to access beyond the end of the bitmap are simply ignored. + fn dirty_at(&self, offset: usize) -> bool { + let page = page_number(offset); + if page >= self.number_of_pages { + return false; // ignore out of bound access + } + + // get the absolute page number + let page = self.pages_before_region + page; + let page_bit = self.logmem[page_word(page)].load(Ordering::Relaxed) & (1 << page_bit(page)); + page_bit != 0 + } +} + +/// `MmaplogReg` mmaps the frontend bitmap backing memory in the current process. +#[derive(Debug)] +pub struct MmapLogReg { + addr: *const AtomicU8, + len: usize, +} + +// SAFETY: Send is not automatically implemented because the raw pointer. +// No one besides `MmapLogReg` has the raw pointer, so we can safely transfer it to another thread. +unsafe impl Send for MmapLogReg {} + +// SAFETY: Sync is not automatically implemented because the raw pointer. +// `MmapLogReg` doesn't have any interior mutability and all access to `&AtomicU8` +// are done through atomic operations. +unsafe impl Sync for MmapLogReg {} + +impl MmapLogReg { + // Note: We could try to adjust the mapping area to only cover the memory region, but + // the region's starting address is not guarantee to be LOG_WORD_SIZE-page aligned + // which makes the implementation needlessly cumbersome. + // Note: The specification does not define whether the offset must be page-aligned or not. + // But, since we are receiving the offset from the frontend to be used to call mmap, + // we assume it is properly aligned (currently, qemu always send a 0 offset). + pub(crate) fn from_file(fd: BorrowedFd, offset: u64, len: u64) -> io::Result { + let offset: isize = offset.io_try_into()?; + let len: usize = len.io_try_into()?; + + // Let's uphold the safety contract for `std::ptr::offset()`. + if len > isize::MAX as usize { + return Err(io::Error::from(io::ErrorKind::InvalidData)); + } + + // SAFETY: `fd` is a valid file descriptor and we are not using `libc::MAP_FIXED`. + let addr = unsafe { + libc::mmap( + ptr::null_mut(), + len as libc::size_t, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + offset as libc::off_t, + ) + }; + + if addr == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(Self { + addr: addr as *const AtomicU8, + len, + }) + } + + fn len(&self) -> usize { + self.len + } +} + +impl Index for MmapLogReg { + type Output = AtomicU8; + + // It's ok to get a reference to an atomic value. + fn index(&self, index: usize) -> &Self::Output { + assert!(index < self.len); + // Note: Instead of `&*` we can use `AtomicU8::from_ptr()` as soon it gets stabilized. + // SAFETY: `self.addr` is a valid and properly aligned pointer. Also, `self.addr` + `index` + // doesn't wrap around and is contained within the mapped memory region. + unsafe { &*self.addr.add(index) } + } +} + +impl Drop for MmapLogReg { + fn drop(&mut self) { + // SAFETY: `addr` is properly aligned, also we are sure that this is the + // last reference alive and/or we have an exclusive access to this object. + unsafe { + libc::munmap(self.addr as *mut libc::c_void, self.len as libc::size_t); + } + } +} + +trait IoTryInto>: Sized { + fn io_try_into(self) -> io::Result; +} + +impl IoTryInto for TySrc +where + TyDst: TryFrom, + >::Error: Send + Sync + std::error::Error + 'static, +{ + fn io_try_into(self) -> io::Result { + self.try_into() + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + } +} + +#[inline] +// Get the page number corresponding to the address `addr` +fn page_number(addr: usize) -> usize { + addr / LOG_PAGE_SIZE +} + +#[inline] +// Get the word within the bitmap of the page. +// Each page is indexed inside a word of `LOG_WORD_SIZE` bits. +fn page_word(page: usize) -> usize { + page / LOG_WORD_SIZE +} + +#[inline] +// Get the bit index inside a word of `LOG_WORD_SIZE` bits +fn page_bit(page: usize) -> usize { + page % LOG_WORD_SIZE +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::io::Write; + use std::os::fd::AsFd; + use vm_memory::{GuestAddress, GuestRegionMmap}; + use vmm_sys_util::tempfile::TempFile; + + // Helper method to check whether a specified range is clean. + pub fn range_is_clean(b: &B, start: usize, len: usize) -> bool { + (start..start + len).all(|offset| !b.dirty_at(offset)) + } + + // Helper method to check whether a specified range is dirty. + pub fn range_is_dirty(b: &B, start: usize, len: usize) -> bool { + (start..start + len).all(|offset| b.dirty_at(offset)) + } + + fn tmp_file(len: usize) -> File { + let mut f = TempFile::new().unwrap().into_file(); + let buf = vec![0; len]; + f.write_all(buf.as_ref()).unwrap(); + f + } + + fn test_all(b: &BitmapMmapRegion, len: usize) { + assert!(range_is_clean(b, 0, len), "The bitmap should be clean"); + + b.mark_dirty(0, len); + assert!(range_is_dirty(b, 0, len), "The bitmap should be dirty"); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_bigger_than_log() { + // Let's create a log memory area to track 8 pages, + // since 1 bit correspond to 1 page, we need a 1-byte log memory area. + let mmap_offset: u64 = 0; + let mmap_size = 1; // // 1 byte = 8 bits/pages + let f = tmp_file(mmap_size); + + // A guest memory region of 16 pages + let region_start_addr = GuestAddress(mmap_offset); + let region_len = LOG_PAGE_SIZE * 16; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + + assert!(log.is_err()); + } + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_log_and_region_same_size() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 32-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 32; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + bitmap.replace(log); + + test_all(&bitmap, region_len); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_smaller_than_log() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 16-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 16; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + + bitmap.replace(log); + + test_all(&bitmap, region_len); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_smaller_than_one_word() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 6-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 6; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + bitmap.replace(log); + + test_all(&bitmap, region_len); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_two_regions_overlapping_word_first_dirty() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + // A 11-page guest memory region + let region0_start_addr = GuestAddress::new(mmap_offset); + let region0_len = LOG_PAGE_SIZE * 11; + let region0: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region0_start_addr, region0_len, None).unwrap(); + + let log0 = AtomicBitmapMmap::new(®ion0, Arc::clone(&logmem)); + assert!(log0.is_ok()); + let log0 = log0.unwrap(); + let bitmap0 = BitmapMmapRegion::default(); + bitmap0.replace(log0); + + // A 1-page guest memory region + let region1_start_addr = GuestAddress::new(mmap_offset + LOG_PAGE_SIZE as u64 * 14); + let region1_len = LOG_PAGE_SIZE; + let region1: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region1_start_addr, region1_len, None).unwrap(); + + let log1 = AtomicBitmapMmap::new(®ion1, Arc::clone(&logmem)); + assert!(log1.is_ok()); + let log1 = log1.unwrap(); + + let bitmap1 = BitmapMmapRegion::default(); + bitmap1.replace(log1); + + // Both regions should be clean + assert!( + range_is_clean(&bitmap0, 0, region0_len), + "The bitmap0 should be clean" + ); + assert!( + range_is_clean(&bitmap1, 0, region1_len), + "The bitmap1 should be clean" + ); + + // Marking region 0, region 1 should continue be clean + bitmap0.mark_dirty(0, region0_len); + + assert!( + range_is_dirty(&bitmap0, 0, region0_len), + "The bitmap0 should be dirty" + ); + assert!( + range_is_clean(&bitmap1, 0, region1_len), + "The bitmap1 should be clean" + ); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_two_regions_overlapping_word_second_dirty() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + // A 11-page guest memory region + let region0_start_addr = GuestAddress::new(mmap_offset); + let region0_len = LOG_PAGE_SIZE * 11; + let region0: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region0_start_addr, region0_len, None).unwrap(); + + let log0 = AtomicBitmapMmap::new(®ion0, Arc::clone(&logmem)); + assert!(log0.is_ok()); + let log0 = log0.unwrap(); + + let bitmap0 = BitmapMmapRegion::default(); + bitmap0.replace(log0); + + // A 1-page guest memory region + let region1_start_addr = GuestAddress::new(mmap_offset + LOG_PAGE_SIZE as u64 * 14); + let region1_len = LOG_PAGE_SIZE; + let region1: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region1_start_addr, region1_len, None).unwrap(); + + let log1 = AtomicBitmapMmap::new(®ion1, Arc::clone(&logmem)); + assert!(log1.is_ok()); + let log1 = log1.unwrap(); + + let bitmap1 = BitmapMmapRegion::default(); + bitmap1.replace(log1); + + // Both regions should be clean + assert!( + range_is_clean(&bitmap0, 0, region0_len), + "The bitmap0 should be clean" + ); + assert!( + range_is_clean(&bitmap1, 0, region1_len), + "The bitmap1 should be clean" + ); + + // Marking region 1, region 0 should continue be clean + bitmap1.mark_dirty(0, region1_len); + + assert!( + range_is_dirty(&bitmap1, 0, region1_len), + "The bitmap0 should be dirty" + ); + assert!( + range_is_clean(&bitmap0, 0, region0_len), + "The bitmap1 should be clean" + ); + } + + #[test] + #[cfg(not(miri))] // Miri cannot mmap files + fn test_bitmap_region_slice() { + // A log memory area able to track 32 pages + let mmap_offset: u64 = 0; + let mmap_size = 4; // 4 bytes * 8 bits = 32 bits/pages + let f = tmp_file(mmap_size); + + // A 32-page guest memory region + let region_start_addr = GuestAddress::new(mmap_offset); + let region_len = LOG_PAGE_SIZE * 32; + let region: GuestRegionMmap<()> = + GuestRegionMmap::from_range(region_start_addr, region_len, None).unwrap(); + + let logmem = + Arc::new(MmapLogReg::from_file(f.as_fd(), mmap_offset, mmap_size as u64).unwrap()); + + let log = AtomicBitmapMmap::new(®ion, logmem); + assert!(log.is_ok()); + let log = log.unwrap(); + + let bitmap = BitmapMmapRegion::default(); + bitmap.replace(log); + + assert!( + range_is_clean(&bitmap, 0, region_len), + "The bitmap should be clean" + ); + + // Let's get a slice of half the bitmap + let slice_len = region_len / 2; + let slice = bitmap.slice_at(slice_len); + assert!( + range_is_clean(&slice, 0, slice_len), + "The slice should be clean" + ); + + slice.mark_dirty(0, slice_len); + assert!( + range_is_dirty(&slice, 0, slice_len), + "The slice should be dirty" + ); + assert!( + range_is_clean(&bitmap, 0, slice_len), + "The first half of the bitmap should be clean" + ); + assert!( + range_is_dirty(&bitmap, slice_len, region_len - slice_len), + "The last half of the bitmap should be dirty" + ); + } +} diff --git a/vhost-user-backend/src/handler.rs b/vhost-user-backend/src/handler.rs index c63bc935..62c4a66d 100644 --- a/vhost-user-backend/src/handler.rs +++ b/vhost-user-backend/src/handler.rs @@ -6,16 +6,18 @@ use std::error; use std::fs::File; use std::io; +use std::os::fd::AsFd; #[cfg(feature = "postcopy")] use std::os::fd::FromRawFd; use std::os::unix::io::AsRawFd; use std::sync::Arc; use std::thread; +use crate::bitmap::{BitmapReplace, MemRegionBitmap, MmapLogReg}; #[cfg(feature = "postcopy")] use userfaultfd::{Uffd, UffdBuilder}; use vhost::vhost_user::message::{ - VhostTransferStateDirection, VhostTransferStatePhase, VhostUserConfigFlags, + VhostTransferStateDirection, VhostTransferStatePhase, VhostUserConfigFlags, VhostUserLog, VhostUserMemoryRegion, VhostUserProtocolFeatures, VhostUserSingleMemoryRegion, VhostUserVirtioFeatures, VhostUserVringAddrFlags, VhostUserVringState, }; @@ -25,7 +27,10 @@ use vhost::vhost_user::{ use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use virtio_queue::{Error as VirtQueError, QueueT}; use vm_memory::mmap::NewBitmap; -use vm_memory::{GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap}; +use vm_memory::{ + GuestAddress, GuestAddressSpace, GuestMemory, GuestMemoryMmap, GuestMemoryRegion, + GuestRegionMmap, +}; use vmm_sys_util::epoll::EventSet; use super::backend::VhostUserBackend; @@ -230,7 +235,7 @@ where impl VhostUserBackendReqHandlerMut for VhostUserHandler where - T::Bitmap: NewBitmap + Clone, + T::Bitmap: BitmapReplace + NewBitmap + Clone, { fn set_owner(&mut self) -> VhostUserResult<()> { if self.owned { @@ -711,6 +716,47 @@ where self.uffd = None; Ok(()) } + + // Sets logging (i.e., bitmap) shared memory space. + // + // During live migration, the front-end may need to track the modifications the back-end + // makes to the memory mapped regions. The front-end should mark the dirty pages in a log. + // Once it complies to this logging, it may declare the `VHOST_F_LOG_ALL` vhost feature. + // + // If the backend has the `VHOST_USER_PROTOCOL_F_LOG_SHMFD` protocol feature it may receive + // the `VHOST_USER_SET_LOG_BASE` message. The log memory file descriptor is provided in `file`, + // the size and offset of shared memory area are provided in the `VhostUserLog` message. + // + // See https://qemu-project.gitlab.io/qemu/interop/vhost-user.html#migration. + // TODO: We ignore the `LOG_ALL` flag on `SET_FEATURES`, so we will continue marking pages as + // dirty even if the migration fails. We need to disable the logging after receiving a + // `SET_FEATURE` without the `LOG_ALL` flag. + fn set_log_base(&mut self, log: &VhostUserLog, file: File) -> VhostUserResult<()> { + let mem = self.atomic_mem.memory(); + + let logmem = Arc::new( + MmapLogReg::from_file(file.as_fd(), log.mmap_offset, log.mmap_size) + .map_err(VhostUserError::ReqHandlerError)?, + ); + + // Let's create all bitmaps first before replacing them, in case any of them fails + let mut bitmaps = Vec::new(); + for region in mem.iter() { + let bitmap = <::Bitmap as BitmapReplace>::InnerBitmap::new( + region, + Arc::clone(&logmem), + ) + .map_err(VhostUserError::ReqHandlerError)?; + + bitmaps.push((region, bitmap)); + } + + for (region, bitmap) in bitmaps { + region.bitmap().replace(bitmap); + } + + Ok(()) + } } impl Drop for VhostUserHandler { diff --git a/vhost-user-backend/src/lib.rs b/vhost-user-backend/src/lib.rs index 4bdf7c3c..d6cfbf96 100644 --- a/vhost-user-backend/src/lib.rs +++ b/vhost-user-backend/src/lib.rs @@ -28,6 +28,9 @@ pub use self::event_loop::VringEpollHandler; mod handler; pub use self::handler::VhostUserHandlerError; +pub mod bitmap; +use crate::bitmap::BitmapReplace; + mod vring; pub use self::vring::{ VringMutex, VringRwLock, VringState, VringStateGuard, VringStateMutGuard, VringT, @@ -95,7 +98,7 @@ pub struct VhostUserDaemon { impl VhostUserDaemon where T: VhostUserBackend + Clone + 'static, - T::Bitmap: NewBitmap + Clone + Send + Sync, + T::Bitmap: BitmapReplace + NewBitmap + Clone + Send + Sync, T::Vring: Clone + Send + Sync, { /// Create the daemon instance, providing the backend implementation of `VhostUserBackend`. diff --git a/vhost/CHANGELOG.md b/vhost/CHANGELOG.md index 031e5f44..147438ad 100644 --- a/vhost/CHANGELOG.md +++ b/vhost/CHANGELOG.md @@ -2,6 +2,7 @@ ## [Unreleased] ### Added +[#206](https://github.com/rust-vmm/vhost/pull/206) Add bitmap support for tracking dirty pages during migration ### Changed diff --git a/vhost/src/vhost_user/backend_req_handler.rs b/vhost/src/vhost_user/backend_req_handler.rs index 238606e6..25ffd9c5 100644 --- a/vhost/src/vhost_user/backend_req_handler.rs +++ b/vhost/src/vhost_user/backend_req_handler.rs @@ -83,6 +83,7 @@ pub trait VhostUserBackendReqHandler { fn postcopy_listen(&self) -> Result<()>; #[cfg(feature = "postcopy")] fn postcopy_end(&self) -> Result<()>; + fn set_log_base(&self, log: &VhostUserLog, file: File) -> Result<()>; } /// Services provided to the frontend by the backend without interior mutability. @@ -144,6 +145,7 @@ pub trait VhostUserBackendReqHandlerMut { fn postcopy_listen(&mut self) -> Result<()>; #[cfg(feature = "postcopy")] fn postcopy_end(&mut self) -> Result<()>; + fn set_log_base(&mut self, log: &VhostUserLog, file: File) -> Result<()>; } impl VhostUserBackendReqHandler for Mutex { @@ -282,6 +284,9 @@ impl VhostUserBackendReqHandler for Mutex { fn postcopy_end(&self) -> Result<()> { self.lock().unwrap().postcopy_end() } + fn set_log_base(&self, log: &VhostUserLog, file: File) -> Result<()> { + self.lock().unwrap().set_log_base(log, file) + } } /// Server to handle service requests from frontends from the frontend communication channel. @@ -649,6 +654,18 @@ impl BackendReqHandler { let res = self.backend.postcopy_end(); self.send_ack_message(&hdr, res)?; } + // Sets logging shared memory space. + // When the back-end has `VHOST_USER_PROTOCOL_F_LOG_SHMFD` protocol feature, the log + // memory `fd` is provided in the ancillary data of `VHOST_USER_SET_LOG_BASE` message, + // the size and offset of shared memory area provided in the message. + // See https://qemu-project.gitlab.io/qemu/interop/vhost-user.html#migration. + Ok(FrontendReq::SET_LOG_BASE) => { + self.check_proto_feature(VhostUserProtocolFeatures::LOG_SHMFD)?; + let file = take_single_file(files).ok_or(Error::IncorrectFds)?; + let msg = self.extract_request_body::(&hdr, size, &buf)?; + self.backend.set_log_base(&msg, file)?; + self.send_reply_message(&hdr, &msg)?; + } _ => { return Err(Error::InvalidMessage); } diff --git a/vhost/src/vhost_user/dummy_backend.rs b/vhost/src/vhost_user/dummy_backend.rs index 9a2bb5b9..afdc73fb 100644 --- a/vhost/src/vhost_user/dummy_backend.rs +++ b/vhost/src/vhost_user/dummy_backend.rs @@ -326,4 +326,7 @@ impl VhostUserBackendReqHandlerMut for DummyBackendReqHandler { fn postcopy_end(&mut self) -> Result<()> { Ok(()) } + fn set_log_base(&mut self, _log: &VhostUserLog, _file: File) -> Result<()> { + Err(Error::InvalidMessage) + } } diff --git a/vhost/src/vhost_user/message.rs b/vhost/src/vhost_user/message.rs index cc6b6753..f24331c2 100644 --- a/vhost/src/vhost_user/message.rs +++ b/vhost/src/vhost_user/message.rs @@ -394,6 +394,8 @@ bitflags! { #[derive(Copy, Clone, Debug, Eq, PartialEq)] /// Transport specific flags in VirtIO feature set defined by vhost-user. pub struct VhostUserVirtioFeatures: u64 { + /// Log dirtied shared memory pages. + const LOG_ALL = 0x400_0000; /// Feature flag for the protocol feature. const PROTOCOL_FEATURES = 0x4000_0000; }