Skip to content

fix(snapshot/x86_64): make sure TSC_DEADLINE MSR is non-zero #4618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ and this project adheres to
UFFD support not being forward-compatible with new ioctl options introduced in
Linux 6.6. See also
https://github.com/bytecodealliance/userfaultfd-rs/issues/61.
- [#4618](https://github.com/firecracker-microvm/firecracker/pull/4618): On
x86_64, when taking a snapshot, if a vCPU has MSR_IA32_TSC_DEADLINE set to 0,
Firecracker will replace it with the MSR_IA32_TSC value from the same vCPU.
This is to guarantee that the vCPU will continue receiving TSC interrupts
after restoring from the snapshot even if an interrupt is lost when taking a
snapshot.

## \[1.7.0\]

Expand Down
5 changes: 5 additions & 0 deletions docs/snapshotting/snapshot-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ The snapshot functionality is still in developer preview due to the following:
the data store is not persisted across snapshots.
- Configuration information for metrics and logs are not saved to the snapshot.
These need to be reconfigured on the restored microVM.
- On x86_64, if a vCPU has MSR_IA32_TSC_DEADLINE set to 0 when a snapshot is
taken, Firecracker replaces it with the MSR_IA32_TSC value from the same vCPU.
This is to guarantee that the vCPU will continue receiving TSC interrupts
after restoring from the snapshot even if an interrupt is lost when taking a
snapshot.

## Snapshot versioning

Expand Down
4 changes: 3 additions & 1 deletion src/cpu-template-helper/src/template/dump/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ fn msrs_to_modifier(msrs: &HashMap<u32, u64>) -> Vec<RegisterModifier> {
// Fireracker diables some features (e.g., PMU) and doesn't support some features (e.g., Hyper-V),
// MSRs related to such features are not useful as CPU configuration dump. Excluding such MSRs
// reduces maintenance cost when KVM makes change their default values.
const MSR_EXCLUSION_LIST: [MsrRange; 9] = [
const MSR_EXCLUSION_LIST: [MsrRange; 10] = [
// - MSR_IA32_TSC (0x10): vary depending on the elapsed time.
MSR_RANGE!(MSR_IA32_TSC),
// - MSR_IA32_TSC_DEADLINE (0x6e0): varies depending on the elapsed time.
MSR_RANGE!(MSR_IA32_TSC_DEADLINE),
// Firecracker doesn't support MCE.
// - MSR_IA32_MCG_STATUS (0x17a)
// - MSR_IA32_MCG_EXT_CTL (0x4d0)
Expand Down
110 changes: 110 additions & 0 deletions src/vmm/src/vstate/vcpu/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use serde::{Deserialize, Serialize};
use crate::arch::x86_64::interrupts;
use crate::arch::x86_64::msr::{create_boot_msr_entries, MsrError};
use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError};
use crate::arch_gen::x86::msr_index::{MSR_IA32_TSC, MSR_IA32_TSC_DEADLINE};
use crate::cpu_config::x86_64::{cpuid, CpuConfiguration};
use crate::logger::{IncMetric, METRICS};
use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap};
Expand Down Expand Up @@ -282,6 +283,39 @@ impl KvmVcpu {
Ok(cpuid)
}

/// If the IA32_TSC_DEADLINE MSR value is zero, update it
/// with the IA32_TSC value to guarantee that
/// the vCPU will continue receiving interrupts after restoring from a snapshot.
///
/// Rationale: we observed that sometimes when taking a snapshot,
/// the IA32_TSC_DEADLINE MSR is cleared, but the interrupt is not
/// delivered to the guest, leading to a situation where one
/// of the vCPUs never receives TSC interrupts after restoring,
/// until the MSR is updated externally, eg by setting the system time.
fn fix_zero_tsc_deadline_msr(msr_chunks: &mut [Msrs]) {
// We do not expect more than 1 TSC MSR entry, but if there are multiple, pick the maximum.
let max_tsc_value = msr_chunks
.iter()
.flat_map(|msrs| msrs.as_slice())
.filter(|msr| msr.index == MSR_IA32_TSC)
.map(|msr| msr.data)
.max();

if let Some(tsc_value) = max_tsc_value {
msr_chunks
.iter_mut()
.flat_map(|msrs| msrs.as_mut_slice())
.filter(|msr| msr.index == MSR_IA32_TSC_DEADLINE && msr.data == 0)
.for_each(|msr| {
warn!(
"MSR_IA32_TSC_DEADLINE is 0, replacing with {:x}.",
tsc_value
);
msr.data = tsc_value;
});
}
}

/// Get MSR chunks for the given MSR index list.
///
/// KVM only supports getting `KVM_MAX_MSR_ENTRIES` at a time, so we divide
Expand Down Expand Up @@ -321,6 +355,8 @@ impl KvmVcpu {
msr_chunks.push(msrs);
}

Self::fix_zero_tsc_deadline_msr(&mut msr_chunks);

Ok(msr_chunks)
}

Expand Down Expand Up @@ -594,6 +630,7 @@ mod tests {

use std::os::unix::io::AsRawFd;

use kvm_bindings::kvm_msr_entry;
use kvm_ioctls::Cap;

use super::*;
Expand Down Expand Up @@ -949,4 +986,77 @@ mod tests {
}
}
}

fn msrs_from_entries(msr_entries: &[(u32, u64)]) -> Msrs {
Msrs::from_entries(
&msr_entries
.iter()
.map(|&(index, data)| kvm_msr_entry {
index,
data,
..Default::default()
})
.collect::<Vec<_>>(),
)
.unwrap()
}

fn assert_msrs(msr_chunks: &[Msrs], expected_msr_entries: &[(u32, u64)]) {
let flattened_msrs = msr_chunks.iter().flat_map(|msrs| msrs.as_slice());
for (a, b) in flattened_msrs.zip(expected_msr_entries.iter()) {
assert_eq!(a.index, b.0);
assert_eq!(a.data, b.1);
}
}

#[test]
fn test_fix_zero_tsc_deadline_msr_zero_same_chunk() {
// Place both TSC and TSC_DEADLINE MSRs in the same chunk.
let mut msr_chunks = [msrs_from_entries(&[
(MSR_IA32_TSC_DEADLINE, 0),
(MSR_IA32_TSC, 42),
])];

KvmVcpu::fix_zero_tsc_deadline_msr(&mut msr_chunks);

// We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value.
assert_msrs(
&msr_chunks,
&[(MSR_IA32_TSC_DEADLINE, 42), (MSR_IA32_TSC, 42)],
);
}

#[test]
fn test_fix_zero_tsc_deadline_msr_zero_separate_chunks() {
// Place both TSC and TSC_DEADLINE MSRs in separate chunks.
let mut msr_chunks = [
msrs_from_entries(&[(MSR_IA32_TSC_DEADLINE, 0)]),
msrs_from_entries(&[(MSR_IA32_TSC, 42)]),
];

KvmVcpu::fix_zero_tsc_deadline_msr(&mut msr_chunks);

// We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value.
assert_msrs(
&msr_chunks,
&[(MSR_IA32_TSC_DEADLINE, 42), (MSR_IA32_TSC, 42)],
);
}

#[test]
fn test_fix_zero_tsc_deadline_msr_non_zero() {
let mut msr_chunks = [msrs_from_entries(&[
(MSR_IA32_TSC_DEADLINE, 1),
(MSR_IA32_TSC, 2),
])];

KvmVcpu::fix_zero_tsc_deadline_msr(&mut msr_chunks);

// We expect that MSR_IA32_TSC_DEADLINE should remain unchanged, because it is non-zero
// already.
assert_msrs(
&msr_chunks,
&[(MSR_IA32_TSC_DEADLINE, 1), (MSR_IA32_TSC, 2)],
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1276,10 +1276,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0x4b564d00",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1426,10 +1426,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0x4b564d00",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1472,10 +1472,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0x4b564d00",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -962,10 +962,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0xd90",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1158,10 +1158,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0xd90",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1158,10 +1158,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0xd90",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -916,10 +916,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0x4b564d00",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1250,10 +1250,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0x4b564d00",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1273,10 +1273,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0x4b564d00",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -962,10 +962,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0xd90",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1158,10 +1158,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0xd90",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1158,10 +1158,6 @@
"addr": "0x277",
"bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110"
},
{
"addr": "0x6e0",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
},
{
"addr": "0xd90",
"bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000"
Expand Down
Loading