Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix 16K host page support #4916

Merged
merged 4 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ and this project adheres to

- [#4921](https://github.com/firecracker-microvm/firecracker/pull/4921): Fixed
swagger `CpuConfig` definition to include missing aarch64-specific fields.
- [#4916](https://github.com/firecracker-microvm/firecracker/pull/4916): Fixed
`IovDeque` implementation to work with any host page size. This fixes
virtio-net device on non 4K host kernels.

## [1.10.1]

Expand Down
5 changes: 5 additions & 0 deletions src/firecracker/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use seccompiler::BpfThreadMap;
use utils::arg_parser::{ArgParser, Argument};
use utils::validators::validate_instance_id;
use vmm::arch::host_page_size;
use vmm::builder::StartMicrovmError;
use vmm::logger::{
debug, error, info, LoggerConfig, ProcessTimeReporter, StoreMetric, LOGGER, METRICS,
Expand Down Expand Up @@ -108,6 +109,10 @@
// Initialize the logger.
LOGGER.init().map_err(MainError::SetLogger)?;

// First call to this function updates the value to current
// host page size.
_ = host_page_size();

Check warning on line 115 in src/firecracker/src/main.rs

View check run for this annotation

Codecov / codecov/patch

src/firecracker/src/main.rs#L114-L115

Added lines #L114 - L115 were not covered by tests
// We need this so that we can reset terminal to canonical mode if panic occurs.
let stdin = io::stdin();

Expand Down
3 changes: 2 additions & 1 deletion src/vmm/src/arch/aarch64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ pub fn initrd_load_addr(
guest_mem: &GuestMemoryMmap,
initrd_size: usize,
) -> Result<u64, ConfigurationError> {
let round_to_pagesize = |size| (size + (super::PAGE_SIZE - 1)) & !(super::PAGE_SIZE - 1);
let round_to_pagesize =
ShadowCurse marked this conversation as resolved.
Show resolved Hide resolved
|size| (size + (super::GUEST_PAGE_SIZE - 1)) & !(super::GUEST_PAGE_SIZE - 1);
match GuestAddress(get_fdt_addr(guest_mem)).checked_sub(round_to_pagesize(initrd_size) as u64) {
Some(offset) => {
if guest_mem.address_in_range(offset) {
Expand Down
21 changes: 19 additions & 2 deletions src/vmm/src/arch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// SPDX-License-Identifier: Apache-2.0

use std::fmt;
use std::sync::LazyLock;

use log::warn;
use serde::{Deserialize, Serialize};

/// Module for aarch64 related functionality.
Expand Down Expand Up @@ -52,8 +54,23 @@
pub size: usize,
}

/// Default (smallest) memory page size for the supported architectures.
pub const PAGE_SIZE: usize = 4096;
/// Default page size for the guest OS.
pub const GUEST_PAGE_SIZE: usize = 4096;

/// Get the size of the host page size.
pub fn host_page_size() -> usize {
/// Default page size for the host OS.
static PAGE_SIZE: LazyLock<usize> = LazyLock::new(|| {
// # Safety: Value always valid
let r = unsafe { libc::sysconf(libc::_SC_PAGESIZE) };
usize::try_from(r).unwrap_or_else(|_| {
warn!("Could not get host page size with sysconf, assuming default 4K host pages");
4096

Check warning on line 68 in src/vmm/src/arch/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/vmm/src/arch/mod.rs#L67-L68

Added lines #L67 - L68 were not covered by tests
})
});

*PAGE_SIZE
}

impl fmt::Display for DeviceType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down
2 changes: 1 addition & 1 deletion src/vmm/src/arch/x86_64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ pub fn initrd_load_addr(
return Err(ConfigurationError::InitrdAddress);
}

let align_to_pagesize = |address| address & !(super::PAGE_SIZE - 1);
let align_to_pagesize = |address| address & !(super::GUEST_PAGE_SIZE - 1);
Ok(align_to_pagesize(lowmem_size - initrd_size) as u64)
}

Expand Down
4 changes: 2 additions & 2 deletions src/vmm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1305,7 +1305,7 @@ pub(crate) mod tests {
use crate::vstate::memory::GuestMemory;
let image = make_test_bin();

let mem_size: usize = image.len() * 2 + crate::arch::PAGE_SIZE;
let mem_size: usize = image.len() * 2 + crate::arch::GUEST_PAGE_SIZE;

let tempfile = TempFile::new().unwrap();
let mut tempfile = tempfile.into_file();
Expand Down Expand Up @@ -1344,7 +1344,7 @@ pub(crate) mod tests {
let tempfile = TempFile::new().unwrap();
let mut tempfile = tempfile.into_file();
tempfile.write_all(&image).unwrap();
let gm = single_region_mem_at(crate::arch::PAGE_SIZE as u64 + 1, image.len() * 2);
let gm = single_region_mem_at(crate::arch::GUEST_PAGE_SIZE as u64 + 1, image.len() * 2);

let res = load_initrd(&gm, &mut tempfile);
assert!(
Expand Down
60 changes: 39 additions & 21 deletions src/vmm/src/devices/virtio/iov_deque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::os::fd::AsRawFd;
use libc::{c_int, c_void, iovec, off_t, size_t};
use memfd;

use crate::arch::PAGE_SIZE;
use crate::arch::host_page_size;

#[derive(Debug, thiserror::Error, displaydoc::Display)]
pub enum IovDequeError {
Expand Down Expand Up @@ -77,10 +77,7 @@ pub enum IovDequeError {
// pub iov_len: ::size_t,
// }
// ```
//
// This value must be a multiple of 256 because this is the maximum number of `iovec` can fit into
// 1 memory page: 256 * sizeof(iovec) == 4096 == PAGE_SIZE. IovDeque only operates with `PAGE_SIZE`
ShadowCurse marked this conversation as resolved.
Show resolved Hide resolved
// granularity.

#[derive(Debug)]
pub struct IovDeque<const L: u16> {
pub iov: *mut libc::iovec,
Expand All @@ -92,18 +89,15 @@ pub struct IovDeque<const L: u16> {
unsafe impl<const L: u16> Send for IovDeque<L> {}

impl<const L: u16> IovDeque<L> {
const BYTES: usize = L as usize * std::mem::size_of::<iovec>();
const _ASSERT: () = assert!(Self::BYTES % PAGE_SIZE == 0);

/// Create a [`memfd`] object that represents a single physical page
fn create_memfd() -> Result<memfd::Memfd, IovDequeError> {
fn create_memfd(pages_bytes: usize) -> Result<memfd::Memfd, IovDequeError> {
// Create a sealable memfd.
let opts = memfd::MemfdOptions::default().allow_sealing(true);
let mfd = opts.create("iov_deque")?;

// Resize to system page size.
mfd.as_file()
.set_len(Self::BYTES.try_into().unwrap())
.set_len(pages_bytes.try_into().unwrap())
.map_err(IovDequeError::MemfdResize)?;

// Add seals to prevent further resizing.
Expand Down Expand Up @@ -136,13 +130,13 @@ impl<const L: u16> IovDeque<L> {

/// Allocate memory for our ring buffer
///
/// This will allocate 2 * `Self::BYTES` bytes of virtual memory.
fn allocate_ring_buffer_memory() -> Result<*mut c_void, IovDequeError> {
/// This will allocate 2 * `pages_bytes` bytes of virtual memory.
fn allocate_ring_buffer_memory(pages_bytes: usize) -> Result<*mut c_void, IovDequeError> {
// SAFETY: We are calling the system call with valid arguments
unsafe {
Self::mmap(
std::ptr::null_mut(),
Self::BYTES * 2,
pages_bytes * 2,
libc::PROT_NONE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-1,
Expand All @@ -151,18 +145,29 @@ impl<const L: u16> IovDeque<L> {
}
}

/// Calculate a number of bytes in full pages required for
/// the type to operate.
fn pages_bytes() -> usize {
let host_page_size = host_page_size();
let bytes = L as usize * std::mem::size_of::<iovec>();
let num_host_pages = bytes.div_ceil(host_page_size);
num_host_pages * host_page_size
}

/// Create a new [`IovDeque`] that can hold memory described by a single VirtIO queue.
pub fn new() -> Result<Self, IovDequeError> {
let memfd = Self::create_memfd()?;
let pages_bytes = Self::pages_bytes();

let memfd = Self::create_memfd(pages_bytes)?;
let raw_memfd = memfd.as_file().as_raw_fd();
let buffer = Self::allocate_ring_buffer_memory()?;
let buffer = Self::allocate_ring_buffer_memory(pages_bytes)?;

// Map the first page of virtual memory to the physical page described by the memfd object
// SAFETY: We are calling the system call with valid arguments
let _ = unsafe {
Self::mmap(
buffer,
Self::BYTES,
pages_bytes,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED | libc::MAP_FIXED,
raw_memfd,
Expand All @@ -173,17 +178,17 @@ impl<const L: u16> IovDeque<L> {
// Map the second page of virtual memory to the physical page described by the memfd object
//
// SAFETY: This is safe because:
// * Both `buffer` and the result of `buffer.add(Self::BYTES)` are within bounds of the
// * Both `buffer` and the result of `buffer.add(pages_bytes)` are within bounds of the
// allocation we got from `Self::allocate_ring_buffer_memory`.
// * The resulting pointer is the beginning of the second page of our allocation, so it
// doesn't wrap around the address space.
let next_page = unsafe { buffer.add(Self::BYTES) };
let next_page = unsafe { buffer.add(pages_bytes) };

// SAFETY: We are calling the system call with valid arguments
let _ = unsafe {
Self::mmap(
next_page,
Self::BYTES,
pages_bytes,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_SHARED | libc::MAP_FIXED,
raw_memfd,
Expand Down Expand Up @@ -311,9 +316,10 @@ impl<const L: u16> IovDeque<L> {

impl<const L: u16> Drop for IovDeque<L> {
fn drop(&mut self) {
let pages_bytes = Self::pages_bytes();
// SAFETY: We are passing an address that we got from a previous allocation of `2 *
// Self::BYTES` bytes by calling mmap
let _ = unsafe { libc::munmap(self.iov.cast(), Self::BYTES * 2) };
// pages_bytes` by calling mmap
let _ = unsafe { libc::munmap(self.iov.cast(), 2 * pages_bytes) };
}
}

Expand All @@ -331,6 +337,18 @@ mod tests {
assert_eq!(deque.len(), 0);
}

#[test]
fn test_new_less_than_page() {
let deque = super::IovDeque::<128>::new().unwrap();
assert_eq!(deque.len(), 0);
}

#[test]
fn test_new_more_than_page() {
let deque = super::IovDeque::<512>::new().unwrap();
assert_eq!(deque.len(), 0);
}

fn make_iovec(id: u16, len: u16) -> iovec {
iovec {
iov_base: id as *mut libc::c_void,
Expand Down
6 changes: 3 additions & 3 deletions src/vmm/src/devices/virtio/iovec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -815,13 +815,13 @@ mod verification {
use vm_memory::VolatileSlice;

use super::IoVecBuffer;
use crate::arch::GUEST_PAGE_SIZE;
use crate::devices::virtio::iov_deque::IovDeque;
// Redefine `IoVecBufferMut` and `IovDeque` with specific length. Otherwise
// Rust will not know what to do.
type IoVecBufferMutDefault = super::IoVecBufferMut<FIRECRACKER_MAX_QUEUE_SIZE>;
type IovDequeDefault = IovDeque<FIRECRACKER_MAX_QUEUE_SIZE>;

use crate::arch::PAGE_SIZE;
use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE;

// Maximum memory size to use for our buffers. For the time being 1KB.
Expand Down Expand Up @@ -912,8 +912,8 @@ mod verification {
// SAFETY: safe because the layout has non-zero size
let mem = unsafe {
std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked(
2 * PAGE_SIZE,
PAGE_SIZE,
2 * GUEST_PAGE_SIZE,
GUEST_PAGE_SIZE,
))
};
IovDequeDefault {
Expand Down
6 changes: 3 additions & 3 deletions src/vmm/src/gdb/target.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use vm_memory::{Bytes, GuestAddress, GuestMemoryError};
use super::arch;
#[cfg(target_arch = "aarch64")]
use crate::arch::aarch64::vcpu::VcpuError as AarchVcpuError;
use crate::arch::PAGE_SIZE;
use crate::arch::GUEST_PAGE_SIZE;
use crate::logger::{error, info};
use crate::utils::u64_to_usize;
use crate::vstate::vcpu::VcpuSendEventError;
Expand Down Expand Up @@ -396,7 +396,7 @@ impl MultiThreadBase for FirecrackerTarget {
// Compute the amount space left in the page after the gpa
let read_len = std::cmp::min(
data.len(),
PAGE_SIZE - (u64_to_usize(gpa) & (PAGE_SIZE - 1)),
GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)),
);

vmm.guest_memory()
Expand Down Expand Up @@ -430,7 +430,7 @@ impl MultiThreadBase for FirecrackerTarget {
// Compute the amount space left in the page after the gpa
let write_len = std::cmp::min(
data.len(),
PAGE_SIZE - (u64_to_usize(gpa) & (PAGE_SIZE - 1)),
GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)),
);

vmm.guest_memory()
Expand Down
2 changes: 1 addition & 1 deletion src/vmm/src/vstate/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ pub(crate) mod tests {
let res = vm.set_kvm_memory_regions(&gm, false);
res.unwrap();

// Trying to set a memory region with a size that is not a multiple of PAGE_SIZE
// Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE
// will result in error.
let gm = single_region_mem(0x10);
let res = vm.set_kvm_memory_regions(&gm, false);
Expand Down
Loading