Skip to content

Commit

Permalink
Merge pull request #581 from msft-jlange/sipi
Browse files Browse the repository at this point in the history
platform/native: use SIPI for API startup
  • Loading branch information
joergroedel authored Jan 21, 2025
2 parents 9fcdaa2 + 1adc89c commit a5b1ee5
Show file tree
Hide file tree
Showing 11 changed files with 279 additions and 19 deletions.
19 changes: 19 additions & 0 deletions bootlib/src/kernel_launch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,22 @@ pub struct Stage2LaunchInfo {
pub igvm_params: u32,
pub _reserved: u32,
}

#[repr(C)]
#[derive(Clone, Copy, Debug)]
pub struct ApStartContext {
// All fields of this context must remain in the same order because they
// are referenced from assembly.
pub cr0: usize,
pub cr3: usize,
pub cr4: usize,
pub efer: usize,
pub start_rip: usize,
pub rsp: usize,
pub initial_rip: usize,
pub transition_cr3: u32,
pub context_size: u32,
}

// The SIPI stub is placed immediately below the stage 2 heap are.
pub const SIPI_STUB_GPA: u32 = 0xF000;
7 changes: 7 additions & 0 deletions igvmbuilder/src/igvm_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use crate::cpuid::SnpCpuidPage;
use crate::firmware::{parse_firmware, Firmware};
use crate::paging::construct_init_page_tables;
use crate::platform::PlatformMask;
use crate::sipi::add_sipi_stub;
use crate::stage2_stack::Stage2Stack;
use crate::vmsa::{construct_native_start_context, construct_start_context, construct_vmsa};
use crate::GpaMap;
Expand Down Expand Up @@ -527,6 +528,12 @@ impl IgvmBuilder {
);
}

// If the target includes a non-isolated platform, then insert the
// SIPI startup stub.
if COMPATIBILITY_MASK.contains(ANY_NATIVE_COMPATIBILITY_MASK) {
add_sipi_stub(ANY_NATIVE_COMPATIBILITY_MASK, &mut self.directives);
}

Ok(())
}

Expand Down
1 change: 1 addition & 0 deletions igvmbuilder/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod igvm_firmware;
mod ovmf_firmware;
mod paging;
mod platform;
mod sipi;
mod stage2_stack;
mod vmsa;

Expand Down
88 changes: 88 additions & 0 deletions igvmbuilder/src/sipi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// SPDX-License-Identifier: MIT OR Apache-2.0
//
// Copyright (c) 2024 Microsoft Corporation
//
// Author: Jon Lange <[email protected]>

use bootlib::kernel_launch::SIPI_STUB_GPA;
use igvm::IgvmDirectiveHeader;
use igvm_defs::{IgvmPageDataFlags, IgvmPageDataType, PAGE_SIZE_4K};

pub fn add_sipi_stub(compatibility_mask: u32, directives: &mut Vec<IgvmDirectiveHeader>) {
// The SIPI stub is the code that is required on native platforms to
// transition the processor out of real mode and into 64-bit mode when APs
// are started. It includes 16-bit code, 32-bit code, and 64-bit code.
// For simplicity, to avoid having to invoke multiple build elements to
// produce a number of separate, small code modules that are stitched
// together, this routine (somewhat awkwardly) simply just captures the
// required code bytes as a constant array, since this code is small and
// will almost never change. The assembly code and corresponding
// disassembly are listed here for reference.
//
// F000: 0F 20 C0 mov eax, cr0
// F003: 80 C8 01 or al, 1
// F006: 0F 22 C0 mov cr0, eax
// F009: 2E 66 0F 01 16 1A 00 lgdt cs:[001A]
// F010: EA 40 F0 08 00 jmp 0008:F040
// F015: CC int 3
// F016: CC int 3
// F017: CC int 3
// F018: CC int 3
// F019: CC int 3
// F01A: 1F 00 20 F0 00 00
//
// GDT:
// F020: 00 00 00 00 00 00 00 00 // null selector
// F028: FF FF 00 00 00 9B CF 00 // 32-bit code
// F030: FF FF 00 00 00 9B AF 00 // 64-bit code
// F038: FF FF 00 00 00 93 CF 00 // data
//
// F040: 66 B8 18 00 mov ax, 18h
// F044: 8E D8 mov ds, ax
// F046: 8E D0 mov ss, ax
// F048: 8E C0 mov es, ax
// F04A: 8B 05 F8 FF 00 00 mov eax, [FFF8] // page table
// F050: 0F 22 D8 mov cr3, eax
// F053: B9 80 00 00 C0 mov ecx, C0000080h
// F058: 0F 32 rdmsr
// F05A: 0F BA E8 08 bts eax, 8 // EFER_LME
// F05E: 0F 30 wrmsr
// F060: 0F 20 E0 mov eax, cr4
// F063: 83 C8 20 or eax, 20h // CR4_PAE
// F066: 0F 22 E0 mov cr4, eax
// F069: 0F 20 C0 mov eax, cr0
// F06C: 0F BA E8 1F bts eax, 1Fh
// F070: 0F 22 C0 mov cr0, eax // CR0_PG
// F073: BF 00 00 01 00 mov edi, 10000
// F078: 2B 3D FC FF 00 00 sub edi, [FFFC] // context size
// F07E: EA 85 F0 00 00 10 00 jmp 0010:F085
// F085: FF 25 65 0F 00 00 jmp [FFF0] // start routine
// F08B:

let code_bytes: &[u8] = &[
0x0F, 0x20, 0xC0, 0x80, 0xC8, 0x01, 0x0F, 0x22, 0xC0, 0x2E, 0x66, 0x0F, 0x01, 0x16, 0x1A,
0x00, 0xEA, 0x40, 0xF0, 0x08, 0x00, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0x1F, 0x00, 0x20, 0xF0,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00,
0x9B, 0xCF, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x9B, 0xAF, 0x00, 0xFF, 0xFF, 0x00, 0x00,
0x00, 0x93, 0xCF, 0x00, 0x66, 0xB8, 0x18, 0x00, 0x8E, 0xD8, 0x8E, 0xD0, 0x8E, 0xC0, 0x8B,
0x05, 0xF8, 0xFF, 0x00, 0x00, 0x0F, 0x22, 0xD8, 0xB9, 0x80, 0x00, 0x00, 0xC0, 0x0F, 0x32,
0x0F, 0xBA, 0xE8, 0x08, 0x0F, 0x30, 0x0F, 0x20, 0xE0, 0x83, 0xC8, 0x20, 0x0F, 0x22, 0xE0,
0x0F, 0x20, 0xC0, 0x0F, 0xBA, 0xE8, 0x1F, 0x0F, 0x22, 0xC0, 0xBF, 0x00, 0x00, 0x01, 0x00,
0x2B, 0x3D, 0xFC, 0xFF, 0x00, 0x00, 0xEA, 0x85, 0xF0, 0x00, 0x00, 0x10, 0x00, 0xFF, 0x25,
0x65, 0x0F, 0x00, 0x00,
];

let mut page_data = Vec::<u8>::new();
page_data.extend_from_slice(code_bytes);

// Fill the remainder of the page with INT 3.
page_data.resize(PAGE_SIZE_4K.try_into().unwrap(), 0xCC);

directives.push(IgvmDirectiveHeader::PageData {
gpa: SIPI_STUB_GPA as u64,
compatibility_mask,
flags: IgvmPageDataFlags::new(),
data_type: IgvmPageDataType::NORMAL,
data: page_data,
});
}
12 changes: 12 additions & 0 deletions kernel/src/cpu/control_regs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ bitflags! {
}
}

impl From<usize> for CR0Flags {
fn from(bits: usize) -> Self {
CR0Flags::from_bits_truncate(bits as u64)
}
}

#[inline]
pub fn read_cr0() -> CR0Flags {
let cr0: u64;
Expand Down Expand Up @@ -214,6 +220,12 @@ bitflags! {
}
}

impl From<usize> for CR4Flags {
fn from(bits: usize) -> Self {
CR4Flags::from_bits_truncate(bits as u64)
}
}

#[inline]
pub fn read_cr4() -> CR4Flags {
let cr4: u64;
Expand Down
6 changes: 6 additions & 0 deletions kernel/src/cpu/efer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ pub fn write_efer(efer: EFERFlags) {
let val = efer.bits();
write_msr(EFER, val);
}

impl From<usize> for EFERFlags {
fn from(bits: usize) -> Self {
EFERFlags::from_bits_truncate(bits as u64)
}
}
39 changes: 29 additions & 10 deletions kernel/src/cpu/idt/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::types::{Bytes, SVSM_CS};
use alloc::boxed::Box;
use core::arch::{asm, global_asm};
use core::mem;
use core::ops::Deref;

pub const DE_VECTOR: usize = 0;
pub const DB_VECTOR: usize = 1;
Expand Down Expand Up @@ -339,6 +340,23 @@ impl IDT {

self
}

/// Load an IDT.
/// # Safety
/// The caller must guarantee that the IDT lifetime must be static so that
/// its entries are always available to the CPU.
pub unsafe fn load(&self) {
let desc: IdtDesc = IdtDesc {
size: (IDT_ENTRIES * 16) as u16,
address: VirtAddr::from(self.entries.as_ptr()),
};

// SAFETY: Inline assembly to load an IDT. `'static` lifetime ensures
// that address is always available for the CPU.
unsafe {
asm!("lidt (%rax)", in("rax") &desc, options(att_syntax));
}
}
}

impl Default for IDT {
Expand All @@ -348,23 +366,24 @@ impl Default for IDT {
}

impl WriteLockGuard<'static, IDT> {
/// Load an IDT. Its lifetime must be static so that its entries are
/// always available to the CPU.
pub fn load(&self) {
let desc: IdtDesc = IdtDesc {
size: (IDT_ENTRIES * 16) as u16,
address: VirtAddr::from(self.entries.as_ptr()),
};

// SAFETY: Inline assembly to load an IDT. `'static` lifetime ensures
// that address is always available for the CPU.
// SAFETY: the lifetime of the lock guard is static, so the safety
// requirement of IDT::load are met.
unsafe {
asm!("lidt (%rax)", in("rax") &desc, options(att_syntax));
self.deref().load();
}
}
}

impl ReadLockGuard<'static, IDT> {
pub fn load(&self) {
// SAFETY: the lifetime of the lock guard is static, so the safety
// requirement of IDT::load are met.
unsafe {
self.deref().load();
}
}

pub fn base_limit(&self) -> (u64, u16) {
let base: *const IDT = core::ptr::from_ref(self);
let limit = (IDT_ENTRIES * mem::size_of::<IdtEntry>()) as u16;
Expand Down
8 changes: 6 additions & 2 deletions kernel/src/cpu/percpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -876,10 +876,14 @@ impl PerCpu {
Ok(())
}

pub fn load_tss(&self) {
pub fn load_gdt_tss(&self, init_gdt: bool) {
// Create a temporary GDT to use to configure the TSS.
let mut gdt = GDT::new();
gdt.load();
// Load the GDT selectors if requested.
if init_gdt {
gdt.load_selectors();
}
gdt.load_tss(&self.tss);
}

Expand All @@ -892,7 +896,7 @@ impl PerCpu {
// SAFETY: along with the page table we are also uploading the right
// TSS and ISST to ensure a memory safe execution state
unsafe { self.get_pgtable().load() };
self.load_tss();
self.load_gdt_tss(false);
if is_cet_ss_supported() {
self.load_isst();
}
Expand Down
71 changes: 69 additions & 2 deletions kernel/src/cpu/smp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,22 @@ extern crate alloc;

use crate::acpi::tables::ACPICPUInfo;
use crate::address::Address;
use crate::cpu::idt::idt;
use crate::cpu::percpu::{this_cpu, this_cpu_shared, PerCpu};
use crate::cpu::shadow_stack::{is_cet_ss_supported, SCetFlags, MODE_64BIT, S_CET};
use crate::cpu::sse::sse_init;
use crate::enable_shadow_stacks;
use crate::error::SvsmError;
use crate::platform::SvsmPlatform;
use crate::platform::SVSM_PLATFORM;
use crate::hyperv;
use crate::platform::{SvsmPlatform, SVSM_PLATFORM};
use crate::requests::{request_loop, request_processing_main};
use crate::task::{schedule_init, start_kernel_task};
use crate::utils::immut_after_init::immut_after_init_set_multithreaded;

use alloc::string::String;
use bootlib::kernel_launch::ApStartContext;
use core::arch::global_asm;
use core::mem;

fn start_cpu(platform: &dyn SvsmPlatform, apic_id: u32) -> Result<(), SvsmError> {
let start_rip: u64 = (start_ap as *const u8) as u64;
Expand All @@ -46,6 +50,69 @@ pub fn start_secondary_cpus(platform: &dyn SvsmPlatform, cpus: &[ACPICPUInfo]) {
log::info!("Brought {} AP(s) online", count);
}

#[no_mangle]
fn start_ap_setup() {
// Initialize the GDT, TSS, and IDT.
this_cpu().load_gdt_tss(true);
idt().load();
}

extern "C" {
fn start_ap_indirect();
}

global_asm!(
r#"
.globl start_ap_indirect
start_ap_indirect:
/* Load fields from the context structure */
movq (%rdi), %r8 /* CR0 */
movq 8(%rdi), %r9 /* CR3 */
movq 16(%rdi), %r10 /* CR4 */
movl 24(%rdi), %eax /* Low bits of EFER */
movl 28(%rdi), %edx /* High bits of EFER */
movq 32(%rdi), %r11 /* Start RIP */
movq 40(%rdi), %rsp /* Initial RSP */
/* Switch to the target environment. This will remove the transition
* environment and context structure from the address space. */
movq %r8, %cr0
movq %r10, %cr4
movl $0xC0000080, %ecx /* EFER */
wrmsr
movq %r9, %cr3
/* Save the start RIP on the stack. */
pushq %r11
/* Call a startup function to complete setup in the local
* environment. */
call start_ap_setup
/* Begin execution from the starting RIP, which is at the top of the
* stack. */
ret
"#,
options(att_syntax)
);

pub fn create_ap_start_context(
initial_context: &hyperv::HvInitialVpContext,
transition_cr3: u32,
) -> ApStartContext {
ApStartContext {
cr0: initial_context.cr0.try_into().unwrap(),
cr3: initial_context.cr3.try_into().unwrap(),
cr4: initial_context.cr4.try_into().unwrap(),
efer: initial_context.efer.try_into().unwrap(),
start_rip: initial_context.rip.try_into().unwrap(),
rsp: initial_context.rsp.try_into().unwrap(),
transition_cr3,
initial_rip: start_ap_indirect as usize,
context_size: mem::size_of::<ApStartContext>() as u32,
}
}

#[no_mangle]
fn start_ap() {
let percpu = this_cpu();
Expand Down
Loading

0 comments on commit a5b1ee5

Please sign in to comment.